**Install requirements**

In [None]:
!pip3 install 'tensorboardX' 



**Import Google Drive**

In [None]:
# Load the Drive helper and mount
from google.colab import drive
import os
drive.mount('/content/drive')

path = 'drive/My Drive/ego-rnn/'
os.chdir(path)
cwd = os.getcwd()
print("Current dir: "+cwd)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Current dir: /content/drive/My Drive/ego-rnn


**Import libraries**

In [None]:
from __future__ import print_function, division
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize, MultiScaleCornerCrop,
                                RandomHorizontalFlip)
from tensorboardX import SummaryWriter
from makeDatasetRGB import *

import argparse
import sys
import matplotlib.pyplot as plt

import os
import torch
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import glob
from random import random

import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
import torchvision

from torchvision import transforms
from itertools import permutations, combinations
import spatial_transforms 

In [None]:
def build_permutations(frame, classes=30):
  a_list = np.linspace(0, frame, frame, endpoint=False, dtype=int)

  permutations_object = permutations(a_list)
  permutations_list = np.array(list(permutations_object))
  hamming_dist = []

  for i,(A) in enumerate(permutations_list):
    hamming_dist.append(sum([np.count_nonzero((A == B) == False) for B in permutations_list]))

  permutations_list = np.array([permutations_list[i] for i in sorted(range(len(hamming_dist)), key=hamming_dist.__getitem__, reverse=True)])[:classes]
  np.random.shuffle(permutations_list)

  return permutations_list

def gen_split_mmaps(root_dir, stackSize, dir_users):
    Dataset = []
    Mmaps = []
    Labels = []
    
    classes = []
    
    for user in ['S1','S2','S3','S4']:
        user_dir = os.path.join(root_dir, user)
        classes.extend(dir for dir in os.listdir(user_dir) if os.path.isdir(os.path.join(user_dir, dir)))
    
    classes = list(set(classes))
    classes.sort()
    class_to_idx = {classes[i]: i for i in range(len(classes))}
        
    for dir_user in dir_users:

        dir = os.path.join(root_dir, dir_user)

        for target in sorted(os.listdir(dir)): # into folder user
            dir1 = os.path.join(dir, target) 
            if os.path.isdir(dir1):
                insts = sorted(os.listdir(dir1)) # into single action folder
                if insts != []:
                    for inst in insts:
                        inst_dir = os.path.join(dir1, inst+'/mmaps') # into element folder of action
                        numFrames_mmaps = len(glob.glob1(inst_dir, '*.png'))
                        numFrames_rgb = len(glob.glob1(os.path.join(dir1, inst+'/rgb'), '*.png'))
                        if numFrames_mmaps >= stackSize and numFrames_mmaps >= stackSize:
                            Mmaps.append(inst_dir)
                            Dataset.append(os.path.join(dir1, inst+'/rgb'))
                            Labels.append(class_to_idx[target])
                
    return Dataset, Mmaps, Labels

class makeDatasetMmaps(Dataset):
    def __init__(self, root_dir, dir_users, numFrame, bias_whole_image=False, orders_classes=30, spatial_transform=None, normalize=None, seqLen=20,
                 train=True, mulSeg=False, numSeg=1, fmt='.png'):

        self.images, self.mmaps, self.labels = gen_split_mmaps(root_dir, numFrame, dir_users)
        self.spatial_transform = spatial_transform 
        self.normalize = normalize
        self.train = train
        self.mulSeg = mulSeg
        self.numSeg = numSeg
        self.numFrame = numFrame
        self.seqLen = seqLen
        self.fmt = fmt

        self.permutations = build_permutations(numFrame,orders_classes)
        self.bias_whole_image = bias_whole_image
        
        self.spatial_transform_rgb = spatial_transforms.Compose([spatial_transforms.ToTensor(), self.normalize])
        self.spatial_transform_mmaps = transforms.Compose([transforms.Resize(7), transforms.ToTensor()])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        vid_name = self.images[idx]
        vid_mmaps = self.mmaps[idx]
        label = self.labels[idx]
        inpSeq = []
        inpSeq_mmaps = []

        order = np.random.randint(len(self.permutations))

        if self.bias_whole_image:
            if self.bias_whole_image > random():
                order = 0
        
        self.spatial_transform.randomize_parameters()

        for i in np.linspace(1, self.numFrame, self.numFrame, endpoint=True):
            fl_name = vid_name + '/' + 'rgb' + str(int(np.floor(i))).zfill(4) + self.fmt
            img = Image.open(fl_name)
            inpSeq.append(self.spatial_transform_rgb(self.spatial_transform(img.convert('RGB'))))
        
        if order > 0:
            inpSeq = [inpSeq[self.permutations[order][t]] for t in range(self.numFrame)]

        inpSeq = torch.stack(inpSeq, 0)
        return inpSeq,  int(order)

In [None]:
def build_combinations(frame):
  a_list = np.linspace(0, frame, frame, endpoint=False, dtype=int)
  combinations_list = np.array(list(combinations(a_list, 2)))
  return combinations_list


class ordersModel(nn.Module):
    def __init__(self, mem_size=512, orders_classes = 30, frame = 5):
        super(ordersModel, self).__init__()
        self.mem_size = mem_size
        self.frame = frame
        self.combinations = build_combinations(frame)
        self.fc6 = nn.Linear(mem_size*7*7, mem_size*2)
        self.fc7 = nn.Linear(mem_size*4, mem_size)
        self.fc8 = nn.Linear(mem_size*len(self.combinations), orders_classes)
        self.dropout = nn.Dropout(0.5)
        self.orders_classifier = nn.Sequential(self.dropout, self.fc8)

    def forward(self, feat_orders):
      feat_orders_shuffle = []
      
      for t in range(feat_orders.size(0)):
        feat_orders_shuffle.append(self.fc6(feat_orders[t].view(feat_orders[t].size(0),self.mem_size*7*7)))

      feat_orders_shuffle = torch.stack(feat_orders_shuffle, 0)

      feat_orders = [self.fc7(torch.index_select(feat_orders_shuffle, 1, torch.LongTensor([r,c]).cuda()).view(feat_orders_shuffle.size(0),self.mem_size*4)) for r,c in self.combinations]
      feat_orders = torch.stack(feat_orders, 0).permute(1,0,2)

      feat_orders = torch.reshape(feat_orders,(feat_orders.size(0),self.mem_size*len(self.combinations)))
      
      return self.orders_classifier(feat_orders)

In [None]:
from torch.autograd import Variable
from torch.nn import functional as F
from resnetMod import *

class Model(nn.Module):
    def __init__(self, num_classes=61, mem_size=512, orders_classes = 30, frame = 5):
        super(Model, self).__init__()
        self.num_classes = num_classes
        self.resNet = resnet34(True, True)
        self.mem_size = mem_size
        self.weight_softmax = self.resNet.fc.weight
        self.orders_classifier = ordersModel(mem_size, orders_classes, frame)

    def forward(self, inputVariable):
        
        feat_orders = []

        for t in range(inputVariable.size(0)):
            logit, feature_conv, feature_convNBN = self.resNet(inputVariable[t])
            feat_orders.append(feature_conv)

            
        feat_orders = self.orders_classifier(torch.stack(feat_orders, 0).permute(1,0,2,3,4))

        return feat_orders

**Set Arguments**

In [None]:
data_dir = "GTEA61/processed_frames2"
out_dir = 'experiments'

user_train = ['S1','S2','S3','S4']
trainBatchSize = 32
memSize = 512

frame = 4
seqLen = frame

orders_classes = 12

**Prepare Dataset and Dataloader**

In [None]:
numEpochs = 300
lr1 = 1e-3
decay_step = [25, 75, 150]
decay_factor = 0.1

model_folder = os.path.join('./', out_dir, 'pretrain', 'stage1', '5frm')  # Dir for saving models and log files

weight_decay = 4e-5
weight_mmaps = 1

In [None]:
# Data loader
normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224)])

vid_seq_train = makeDatasetMmaps(data_dir, user_train, frame, orders_classes = orders_classes, 
                            spatial_transform=spatial_transform, normalize=normalize, seqLen=seqLen, fmt='.png')

train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                        shuffle=True, num_workers=4, pin_memory=True)

trainInstances = vid_seq_train.__len__()

print('Number of samples in the dataset: training = {} '.format(trainInstances))

Number of samples in the dataset: training = 457 


**Set Parameters**

In [None]:
# Create the dir
if os.path.exists(model_folder):
    print('Directory {} exists!'.format(model_folder))
    sys.exit()
os.makedirs(model_folder)

# Log files
writer = SummaryWriter(model_folder)
train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')

Directory ./experiments/pretrain/stage1/5frm exists!


**Prepare Network and Train**

In [None]:
train_params = []

model = Model(mem_size=memSize, orders_classes=orders_classes, frame=frame)
model.train(False)
for params in model.parameters():
    params.requires_grad = False
#

for params in model.orders_classifier.parameters():
    params.requires_grad = True
    train_params += [params]
#

model.orders_classifier.train(True)
model.cuda()

convLSTMModel(
  (resNet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, trac

**Define Data Preprocessing**

In [None]:
loss_fn = nn.CrossEntropyLoss()

optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=weight_decay, eps=1e-4)

optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step,
                                                        gamma=decay_factor)

**Train**

In [None]:
train_iter = 0
max_accuracy = 0


for epoch in range(numEpochs):
    epoch_loss = 0
    numCorrTrain = 0
    
    trainSamples = 0
    iterPerEpoch = 0

    model.train(True)
    writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)
    

    for i, (inputs, ords_lb) in enumerate(train_loader):
        train_iter += 1
        iterPerEpoch += 1
        optimizer_fn.zero_grad()

        inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
        ords_lbVariable = Variable(torch.squeeze(ords_lb).cuda())
        trainSamples += inputs.size(0)

        output_label = model(inputVariable)
        
        loss = loss_fn(output_label, ords_lbVariable)

        loss.backward()
        optimizer_fn.step()
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += (predicted == ords_lb.cuda()).sum()

        epoch_loss += loss.item()
    
    avg_loss = epoch_loss/iterPerEpoch
    trainAccuracy = (numCorrTrain.item() / trainSamples) * 100

    print('Train: Epoch = {} | Loss = {} | Accuracy = {} '.format(epoch+1, avg_loss, trainAccuracy))
    
    train_log_loss.write('Train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss))
    train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy))
    writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
    writer.add_scalar('train/accuracy', trainAccuracy, epoch+1)
    
        
    if trainAccuracy > max_accuracy:
        save_path_model = (model_folder + '/model_rgb_state_dict.pth')
        torch.save(model.state_dict(), save_path_model)
        max_accuracy = trainAccuracy
    
    # Step the scheduler
    optim_scheduler.step()
    

train_log_loss.close()
train_log_acc.close()
writer.export_scalars_to_json(model_folder + "/all_scalars.json")
writer.close()

print('Best accuracy after {} epochs = {}'.format(epoch, max_accuracy))

Train: Epoch = 1 | Loss = 133.0079298178355 | Accuracy = 10.284463894967178 
Train: Epoch = 2 | Loss = 132.74149856567382 | Accuracy = 7.87746170678337 
Train: Epoch = 3 | Loss = 42.55154685974121 | Accuracy = 9.409190371991247 
Train: Epoch = 4 | Loss = 33.19914067586263 | Accuracy = 9.190371991247265 
Train: Epoch = 5 | Loss = 24.966293589274088 | Accuracy = 5.908096280087528 
Train: Epoch = 6 | Loss = 17.6816743850708 | Accuracy = 10.065645514223196 
Train: Epoch = 7 | Loss = 17.81949259440104 | Accuracy = 10.940919037199125 
Train: Epoch = 8 | Loss = 16.45088914235433 | Accuracy = 10.940919037199125 
Train: Epoch = 9 | Loss = 14.318210474650066 | Accuracy = 10.065645514223196 
Train: Epoch = 10 | Loss = 16.531242243448894 | Accuracy = 10.940919037199125 
Train: Epoch = 11 | Loss = 18.550516827901205 | Accuracy = 8.315098468271334 
Train: Epoch = 12 | Loss = 18.36432736714681 | Accuracy = 10.50328227571116 
Train: Epoch = 13 | Loss = 17.599269485473634 | Accuracy = 11.81619256017505