# ECOfull Implementation

### References : 

<li>BNInception: https://github.com/Cadene/pretrained-models.pytorch/blob/master/pretrainedmodels/models/bninception.py</li>
<li>ECO-PyTorch : https://github.com/mzolfaghari/ECO-pytorch</li>
<li>ECO-Lite Implementation : https://github.com/YutaroOgawa/pytorch_advanced/tree/master/9_video_classification_eco</li>

In [13]:
import os
import torch
import torch.nn as nn
from torch.nn import init

In [14]:
weights_dir = "./weights/"
if not os.path.exists(weights_dir):
    os.mkdir(weights_dir)

# Dataloader for Kinematics 


In [15]:
from utils.kinetics400_eco_dataloader import make_datapath_list, VideoTransform, get_label_id_dictionary, VideoDataset

root_path = './data/kinetics_videos/'
video_list = make_datapath_list(root_path)

resize, crop_size = 224, 224
mean, std = [104, 117, 123], [1, 1, 1]
video_transform = VideoTransform(resize, crop_size, mean, std)

label_dicitionary_path = 'kinetics_400_label_dicitionary.csv'
label_id_dict, id_label_dict = get_label_id_dictionary(label_dicitionary_path)


val_dataset = VideoDataset(video_list, label_id_dict, num_segments=16,
                           phase="val", transform=video_transform, img_tmpl='image_{:05d}.jpg')

batch_size = 8
val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False)


batch_iterator = iter(val_dataloader)  
imgs_transformeds, labels, label_ids, dir_path = next(
    batch_iterator)  
print(imgs_transformeds.shape)


torch.Size([8, 16, 3, 224, 224])


# Import ECOfull


In [16]:
from utils.ecofull import ECOfull

In [17]:
net = ECOfull()
net

ECOfull(
  (conv1_7x7_s2): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (conv1_7x7_s2_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv1_relu_7x7): ReLU(inplace)
  (pool1_3x3_s2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=True)
  (conv2_3x3_reduce): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
  (conv2_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2_relu_3x3_reduce): ReLU(inplace)
  (conv2_3x3): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_3x3_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2_relu_3x3): ReLU(inplace)
  (pool2_3x3_s2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=0, dilation=(1, 1), ceil_mode=True)
  (inception_3a_1x1): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
  (inception_3a_1x1_bn): BatchNorm2d(64, eps=1e-05, m

# Load Pre-Train Model

In [18]:
def load_pretrained_ECO(model_dict, pretrained_model_dict):

    param_names = [] 
    for name, param in model_dict.items():
        param_names.append(name)

    new_state_dict = model_dict.copy()

    print("Load learned parameters")
    for index, (key_name, value) in enumerate(pretrained_model_dict.items()):
        name = param_names[index]  
        new_state_dict[name] = value  

        print(str(key_name)+"→"+str(name))

    return new_state_dict

In [19]:
net_model_ECO = "./weights/ECO_Full_rgb_model_Kinetics.pth.tar"
pretrained_model = torch.load(net_model_ECO, map_location='cpu')
pretrained_model_dict = pretrained_model['state_dict']

model_dict = net.state_dict()
new_state_dict = load_pretrained_ECO(model_dict, pretrained_model_dict)

net.eval()  
net.load_state_dict(new_state_dict)

Load learned parameters
module.base_model.conv1_7x7_s2.weight→conv1_7x7_s2.weight
module.base_model.conv1_7x7_s2.bias→conv1_7x7_s2.bias
module.base_model.conv1_7x7_s2_bn.weight→conv1_7x7_s2_bn.weight
module.base_model.conv1_7x7_s2_bn.bias→conv1_7x7_s2_bn.bias
module.base_model.conv1_7x7_s2_bn.running_mean→conv1_7x7_s2_bn.running_mean
module.base_model.conv1_7x7_s2_bn.running_var→conv1_7x7_s2_bn.running_var
module.base_model.conv1_7x7_s2_bn.num_batches_tracked→conv1_7x7_s2_bn.num_batches_tracked
module.base_model.conv2_3x3_reduce.weight→conv2_3x3_reduce.weight
module.base_model.conv2_3x3_reduce.bias→conv2_3x3_reduce.bias
module.base_model.conv2_3x3_reduce_bn.weight→conv2_3x3_reduce_bn.weight
module.base_model.conv2_3x3_reduce_bn.bias→conv2_3x3_reduce_bn.bias
module.base_model.conv2_3x3_reduce_bn.running_mean→conv2_3x3_reduce_bn.running_mean
module.base_model.conv2_3x3_reduce_bn.running_var→conv2_3x3_reduce_bn.running_var
module.base_model.conv2_3x3_reduce_bn.num_batches_tracked→conv2_3x

In [20]:
net.eval()  

batch_iterator = iter(val_dataloader)  
imgs_transformeds, labels, label_ids, dir_path = next(
    batch_iterator)  

with torch.set_grad_enabled(False):
    outputs = net(imgs_transformeds)  

print(outputs.shape)

('inception_3c_relu_double_3x3_1_out:', torch.Size([128, 96, 28, 28]))
torch.Size([8, 16, 96, 28, 28])
torch.Size([8, 96, 16, 28, 28])
('fc_final_out:', torch.Size([8, 400]))
torch.Size([8, 400])


# Classification of video data


In [21]:
dir_path

('./data/kinetics_videos/bungee jumping/zkXOcxGnUhs_000025_000035',
 './data/kinetics_videos/bungee jumping/dAeUFSdYG1I_000010_000020',
 './data/kinetics_videos/bungee jumping/TUvSX0pYu4o_000002_000012',
 './data/kinetics_videos/bungee jumping/b6yQZjPE26c_000023_000033',
 './data/kinetics_videos/arm wrestling/BdMiTo_OtnU_000024_000034',
 './data/kinetics_videos/arm wrestling/5JzkrOVhPOw_000027_000037',
 './data/kinetics_videos/arm wrestling/ehLnj7pXnYE_000027_000037',
 './data/kinetics_videos/arm wrestling/C4lCVBZ3ux0_000028_000038')

In [22]:
def show_eco_inference_result(dir_path, outputs_input, id_label_dict, idx):
    print("File：", dir_path[idx])  
    
    outputs = outputs_input.clone()  
    
    
    for i in range(5):
      
        output = outputs[idx]
        
        _, pred = torch.max(output, dim=0) 
        
        class_idx = int(pred.numpy())
        
        print("Forcast{}：{}".format(i+1, id_label_dict[class_idx]))
        outputs[idx][class_idx] = -1000  

### Top 5 Predictions

In [23]:
idx = 0
show_eco_inference_result(dir_path, outputs, id_label_dict, idx)

('File\xef\xbc\x9a', './data/kinetics_videos/bungee jumping/zkXOcxGnUhs_000025_000035')
Forcast1：bungee jumping
Forcast2：diving cliff
Forcast3：swinging on something
Forcast4：skydiving
Forcast5：abseiling


In [24]:
idx = 5
show_eco_inference_result(dir_path, outputs, id_label_dict, idx)


('File\xef\xbc\x9a', './data/kinetics_videos/arm wrestling/5JzkrOVhPOw_000027_000037')
Forcast1：arm wrestling
Forcast2：headbutting
Forcast3：rock scissors paper
Forcast4：sniffing
Forcast5：shaking hands


In [25]:
idx = 7
show_eco_inference_result(dir_path, outputs, id_label_dict, idx)


('File\xef\xbc\x9a', './data/kinetics_videos/arm wrestling/C4lCVBZ3ux0_000028_000038')
Forcast1：arm wrestling
Forcast2：shaking hands
Forcast3：rock scissors paper
Forcast4：tai chi
Forcast5：cutting nails


In [26]:
idx = 2
show_eco_inference_result(dir_path, outputs, id_label_dict, idx)


('File\xef\xbc\x9a', './data/kinetics_videos/bungee jumping/TUvSX0pYu4o_000002_000012')
Forcast1：bungee jumping
Forcast2：abseiling
Forcast3：swinging on something
Forcast4：trapezing
Forcast5：climbing ladder


### Top 10 predictions:

In [27]:
def show_eco_inference_result_10(dir_path, outputs_input, id_label_dict, idx):
    print("File：", dir_path[idx])  
    
    outputs = outputs_input.clone()  
    
    
    for i in range(10):
      
        output = outputs[idx]
        
        _, pred = torch.max(output, dim=0) 
        
        class_idx = int(pred.numpy())
        
        print("Forcast{}：{}".format(i+1, id_label_dict[class_idx]))
        outputs[idx][class_idx] = -1000  

In [28]:
idx = 2
show_eco_inference_result_10(dir_path, outputs, id_label_dict, idx)


('File\xef\xbc\x9a', './data/kinetics_videos/bungee jumping/TUvSX0pYu4o_000002_000012')
Forcast1：bungee jumping
Forcast2：abseiling
Forcast3：swinging on something
Forcast4：trapezing
Forcast5：climbing ladder
Forcast6：rock climbing
Forcast7：parasailing
Forcast8：bouncing on trampoline
Forcast9：somersaulting
Forcast10：exercising with an exercise ball
