**Install requirements**

In [None]:
!pip3 install 'tensorboardX' 



**Import Google Drive**

In [None]:
# Load the Drive helper and mount
from google.colab import drive
import os
drive.mount('/content/drive')

path = 'drive/My Drive/ego-rnn/'
os.chdir(path)
cwd = os.getcwd()
print("Current dir: "+cwd)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Current dir: /content/drive/My Drive/ego-rnn


**Import libraries**

In [None]:
from __future__ import print_function, division
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize, MultiScaleCornerCrop,
                                RandomHorizontalFlip)
from tensorboardX import SummaryWriter
from makeDatasetRGB import *
from MyConvLSTMCell import *

import argparse
import sys
import matplotlib.pyplot as plt

import os
import torch
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import glob
from random import random

import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
import torchvision

from torchvision import transforms
from itertools import permutations, combinations
import spatial_transforms 

from torch.autograd import Variable
from torch.nn import functional as F
from resnetMod import *

In [None]:
def build_permutations(frame, classes=100):
  a_list = np.linspace(0, frame, frame, endpoint=False, dtype=int)

  permutations_object = permutations(a_list)
  permutations_list = np.array(list(permutations_object))[:2020]
  hamming_dist = []

  for i,(A) in enumerate(permutations_list):
    hamming_dist.append(sum([np.count_nonzero((A == B) == False) for B in permutations_list]))

  permutations_list = np.array([permutations_list[i] for i in sorted(range(len(hamming_dist)), key=hamming_dist.__getitem__, reverse=True)])[:classes]
  np.random.shuffle(permutations_list)

  return permutations_list

def gen_split_mmaps(root_dir, stackSize, dir_users):
    Dataset = []
    Mmaps = []
    Labels = []
    
    classes = []
    
    for user in ['S1','S2','S3','S4']:
        user_dir = os.path.join(root_dir, user)
        classes.extend(dir for dir in os.listdir(user_dir) if os.path.isdir(os.path.join(user_dir, dir)))
    
    classes = list(set(classes))
    classes.sort()
    class_to_idx = {classes[i]: i for i in range(len(classes))}
        
    for dir_user in dir_users:

        dir = os.path.join(root_dir, dir_user)

        for target in sorted(os.listdir(dir)): # into folder user
            dir1 = os.path.join(dir, target) 
            if os.path.isdir(dir1):
                insts = sorted(os.listdir(dir1)) # into single action folder
                if insts != []:
                    for inst in insts:
                        inst_dir = os.path.join(dir1, inst+'/mmaps') # into element folder of action
                        numFrames_mmaps = len(glob.glob1(inst_dir, '*.png'))
                        numFrames_rgb = len(glob.glob1(os.path.join(dir1, inst+'/rgb'), '*.png'))
                        if numFrames_mmaps >= stackSize and numFrames_mmaps >= stackSize  >= stackSize:
                            Mmaps.append(inst_dir)
                            Dataset.append(os.path.join(dir1, inst+'/rgb'))
                            Labels.append(class_to_idx[target])
                
    return Dataset, Mmaps, Labels

class makeDatasetMmaps(Dataset):
    def __init__(self, root_dir, dir_users, numFrame, orders_classes = 1000, spatial_transform=None, normalize=None, seqLen=20,
                 train=True, mulSeg=False, numSeg=1, fmt='.png'):

        self.images, self.mmaps, self.labels = gen_split_mmaps(root_dir, numFrame, dir_users)
        self.spatial_transform = spatial_transform 
        self.normalize = normalize
        self.train = train
        self.mulSeg = mulSeg
        self.numSeg = numSeg
        self.numFrame = numFrame
        self.seqLen = seqLen
        self.fmt = fmt

        self.spatial_transform_rgb = spatial_transforms.Compose([spatial_transforms.ToTensor(), self.normalize])
        self.spatial_transform_mmaps = transforms.Compose([transforms.Resize(7), transforms.ToTensor()])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        vid_name = self.images[idx]
        vid_mmaps = self.mmaps[idx]
        label = self.labels[idx]
        inpSeq = []
        inpSeq_mmaps = []

        order = np.random.randint(orders_classes)
        
        self.spatial_transform.randomize_parameters()

        for i in np.linspace(1, self.numFrame, self.numFrame, endpoint=True):
            fl_name = vid_name + '/' + 'rgb' + str(int(np.floor(i))).zfill(4) + self.fmt
            img = Image.open(fl_name)
            inpSeq.append(self.spatial_transform_rgb(self.spatial_transform(img.convert('RGB'))))
            

            fl_name_mmaps = vid_mmaps + '/' + 'map' + str(int(np.floor(i))).zfill(4) + self.fmt
            if not os.path.exists(fl_name_mmaps):
                fl_name_mmaps = vid_mmaps + '/' + 'map' + str(int(np.floor(i+1))).zfill(4) + self.fmt
            
            img_mmap = Image.open(fl_name_mmaps)
            inpSeq_mmaps.append(self.spatial_transform_mmaps(self.spatial_transform(img_mmap.convert('1'))))

        inpSeq = torch.stack(inpSeq, 0)
        inpSeq_mmaps = torch.stack(inpSeq_mmaps, 0)
        return inpSeq, inpSeq_mmaps, label, int(order)

In [None]:
class MyMotionSegCell(nn.Module):

    def __init__(self, kernel_size=1, stride=1, padding=0):
        super(MyMotionSegCell, self).__init__()

        self.relu = nn.ReLU()
        self.ms_conv = nn.Conv2d(512, 100, kernel_size=1, stride=1, padding=0, bias=False)
        self.ms_fc = nn.Linear(100 * 7 * 7, 2 * 7 * 7)

    def forward(self, x):
        x = self.relu(x)
        x = self.ms_conv(x)
        x = x.view(x.size(0),100*7*7)
        x = self.ms_fc(x)
        x = x.view(x.size(0),2,7,7)

        return x


In [None]:
def build_combinations(frame):
  a_list = np.linspace(0, frame, frame, endpoint=False, dtype=int)
  combinations_list = np.array(list(combinations(a_list, 2)))
  return combinations_list


class ordersModel(nn.Module):
    def __init__(self, mem_size=512, orders_classes = 100, frame = 7):
        super(ordersModel, self).__init__()
        self.mem_size = mem_size
        self.frame = frame
        self.combinations = build_combinations(7)
        self.permutations = build_permutations(7,orders_classes)
        self.fc6 = nn.Linear(mem_size*7*7, mem_size*2)
        self.fc7 = nn.Sequential(nn.Linear(mem_size*4, mem_size))
        self.orders_classifier = nn.Linear(mem_size*len(self.combinations), orders_classes)

    def forward(self, feat_orders, orderVariable):
      feat_orders_shuffle = []
      
      for t in range(feat_orders.size(0)):
        order = orderVariable[t].item()

        feat = self.fc6(feat_orders[t].view(feat_orders[t].size(0),self.mem_size*7*7))
        feat_orders_shuffle.append(torch.index_select(feat, 0, torch.LongTensor(self.permutations[order]).cuda()))

      feat_orders_shuffle = torch.stack(feat_orders_shuffle, 0)

      feat_orders = [self.fc7(torch.index_select(feat_orders_shuffle, 1, torch.LongTensor([r,c]).cuda()).view(feat_orders_shuffle.size(0),self.mem_size*4)) for r,c in self.combinations]
      feat_orders = torch.stack(feat_orders, 0).permute(1,0,2)

      feat_orders = torch.reshape(feat_orders,(feat_orders.size(0),self.mem_size*len(self.combinations)))
      
      return self.orders_classifier(feat_orders)

In [None]:
class convLSTMModel(nn.Module):
    def __init__(self, num_classes=61, mem_size=512, orders_classes = 100, frame = 7):
        super(convLSTMModel, self).__init__()
        self.num_classes = num_classes
        self.resNet = resnet34(True, True)
        self.mem_size = mem_size
        self.weight_softmax = self.resNet.fc.weight
        self.lstm_cell = MyConvLSTMCell(512, mem_size)
        self.ms_cell = MyMotionSegCell()
        self.avgpool = nn.AvgPool2d(7)
        self.dropout = nn.Dropout(0.7)
        self.fc = nn.Linear(mem_size, self.num_classes)
        self.classifier = nn.Sequential(self.dropout, self.fc)
        self.orders_classifier = ordersModel(mem_size, orders_classes, frame)

    def forward(self, inputVariable, orderVariable = None, ORD = True, CAM = False, MS = False):
        state = (Variable(torch.zeros((inputVariable.size(1), self.mem_size, 7, 7)).cuda()),
                 Variable(torch.zeros((inputVariable.size(1), self.mem_size, 7, 7)).cuda()))
        
        feats_ms = []
        feat_orders = []

        for t in range(inputVariable.size(0)):
            logit, feature_conv, feature_convNBN = self.resNet(inputVariable[t])
            feat_orders.append(feature_conv)

            if MS: 
              feats_ms.append(self.ms_cell(feature_conv))

            if CAM:
              bz, nc, h, w = feature_conv.size()
              feature_conv1 = feature_conv.view(bz, nc, h*w)
              probs, idxs = logit.sort(1, True)
              class_idx = idxs[:, 0]
              cam = torch.bmm(self.weight_softmax[class_idx].unsqueeze(1), feature_conv1)
              attentionMAP = F.softmax(cam.squeeze(1), dim=1)
              attentionMAP = attentionMAP.view(attentionMAP.size(0), 1, 7, 7)
              attentionFeat = feature_convNBN * attentionMAP.expand_as(feature_conv)
              state = self.lstm_cell(attentionFeat, state)
            else:
              state = self.lstm_cell(feature_conv, state)
        
        if MS:
          feats_ms = torch.stack(feats_ms, 0)

        feats1 = self.avgpool(state[1]).view(state[1].size(0), -1)
        feats = self.classifier(feats1)

        if ORD:
          feat_orders = self.orders_classifier(torch.stack(feat_orders, 0).permute(1,0,2,3,4),orderVariable)

        return feats, feats_ms, feats1, feat_orders
        

**Set Arguments**

In [None]:
data_dir = "GTEA61/processed_frames2"
out_dir = 'experiments'

user_train = ['S1','S3','S4']
user_val = ['S2']
trainBatchSize = 32
valBatchSize = 64
memSize = 512
num_classes = 61

frame = 7
seqLen = frame

orders_classes = 1000

CAM = False

**Prepare Dataset and Dataloader**

In [None]:
numEpochs = 150
lr1 = 1e-4
decay_step = [25, 75]
decay_factor = 0.1
MS = True
weight_jig = 1

model_folder = os.path.join('./', out_dir, 'self-supervised-orders', 'Conv', '1000', '7frm')  # Dir for saving models and log files
stage1_dict = (out_dir + '/rgb/ConvLSMT/16frame/stage1/model_rgb_state_dict.pth')

weight_decay = 4e-5
weight_mmaps = 1

In [None]:
# Data loader
normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224)])

vid_seq_train = makeDatasetMmaps(data_dir, user_train, frame, orders_classes = orders_classes,
                            spatial_transform=spatial_transform, normalize=normalize, seqLen=seqLen, fmt='.png')

train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                        shuffle=True, num_workers=4, pin_memory=True)


vid_seq_val = makeDataset(data_dir, user_val, frame, 
                            spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                            seqLen=seqLen, fmt='.png')

val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, 
                        shuffle=False, num_workers=2, pin_memory=True)

valInstances = vid_seq_val.__len__()
trainInstances = vid_seq_train.__len__()

print('Number of samples in the dataset: training = {} | validation = {}'.format(trainInstances, valInstances))

Number of samples in the dataset: training = 333 | validation = 116


**Set Parameters**

In [None]:
# Create the dir
if os.path.exists(model_folder):
    print('Directory {} exists!'.format(model_folder))
   # sys.exit()
#os.makedirs(model_folder)

# Log files
writer = SummaryWriter(model_folder)
train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

Directory ./experiments/self-supervised-orders/Conv/stage21000/7frm exists!


**Prepare Network and Train**

In [None]:
train_params = []

model = convLSTMModel(num_classes=num_classes, orders_classes=orders_classes, mem_size=memSize, frame=frame)
model.load_state_dict(torch.load(stage1_dict), strict=False)
model.train(False)
for params in model.parameters():
    params.requires_grad = False
#
for params in model.resNet.layer4[0].conv1.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.resNet.layer4[0].conv2.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.resNet.layer4[1].conv1.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.resNet.layer4[1].conv2.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.resNet.layer4[2].conv1.parameters():
    params.requires_grad = True
    train_params += [params]
#
for params in model.resNet.layer4[2].conv2.parameters():
    params.requires_grad = True
    train_params += [params]
#
for params in model.resNet.fc.parameters():
    params.requires_grad = True
    train_params += [params]

model.resNet.layer4[0].conv1.train(True)
model.resNet.layer4[0].conv2.train(True)
model.resNet.layer4[1].conv1.train(True)
model.resNet.layer4[1].conv2.train(True)
model.resNet.layer4[2].conv1.train(True)
model.resNet.layer4[2].conv2.train(True)
model.resNet.fc.train(True)

for params in model.ms_cell.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.lstm_cell.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.classifier.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.orders_classifier.parameters():
    params.requires_grad = True
    train_params += [params]

model.lstm_cell.train(True)
model.ms_cell.train(True)

model.classifier.train(True)
model.orders_classifier.train(True)
model.cuda()

convLSTMModel(
  (resNet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, trac

**Define Data Preprocessing**

In [None]:
loss_fn = nn.CrossEntropyLoss()

optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=weight_decay, eps=1e-4)

optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step,
                                                        gamma=decay_factor)

**Train**

In [None]:
train_iter = 0
min_accuracy = 0


for epoch in range(numEpochs):
    epoch_loss = 0
    epoch_loss_ord = 0
    numCorrTrain = 0
    numCorrTrainOrd = 0
    numCorrTrainMmap = 0
    
    trainSamples = 0
    iterPerEpoch = 0

    model.lstm_cell.train(True)
    model.ms_cell.train(True)
    model.orders_classifier.train(True)
    model.classifier.train(True)
    writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)
    
    model.resNet.layer4[0].conv1.train(True)
    model.resNet.layer4[0].conv2.train(True)
    model.resNet.layer4[1].conv1.train(True)
    model.resNet.layer4[1].conv2.train(True)
    model.resNet.layer4[2].conv1.train(True)
    model.resNet.layer4[2].conv2.train(True)
    model.resNet.fc.train(True)

    for i, (inputs, mmaps, targets, ords_lb) in enumerate(train_loader):
        train_iter += 1
        iterPerEpoch += 1
        optimizer_fn.zero_grad()

        inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
        labelVariable = Variable(targets.cuda())
        ords_lbVariable = Variable(torch.squeeze(ords_lb).cuda())
        trainSamples += inputs.size(0)

        output_label, output_mmaps, _ , orders_logit = model(inputVariable,ords_lbVariable,CAM=CAM, MS=MS)
        
        loss = loss_fn(output_label, labelVariable) + loss_fn(orders_logit, ords_lbVariable) * weight_jig
        
        if MS:
          mmapsVariable = Variable(torch.squeeze(mmaps).cuda())
          output_mmaps = output_mmaps.permute(1, 2, 0, 3, 4)
          loss+=loss_fn(output_mmaps, mmapsVariable.long()) * weight_mmaps

          _, predictedMmap = torch.max(output_mmaps.data, 1)
          numCorrTrainMmap += (predictedMmap == mmapsVariable.cuda()).sum()
          mmapAccuracy = (numCorrTrainMmap.item() / (trainSamples*frame*49)) * 100
          
        
        loss.backward()
        optimizer_fn.step()
        _, predicted = torch.max(output_label.data, 1)
        _, predictedOrd = torch.max(orders_logit.data, 1)
        numCorrTrain += (predicted == targets.cuda()).sum()
        numCorrTrainOrd += (predictedOrd == ords_lb.cuda()).sum()

        epoch_loss += loss.item()
    
    avg_loss = epoch_loss/iterPerEpoch
    trainAccuracy = (numCorrTrain.item() / trainSamples) * 100
    ordersAccuracy = (numCorrTrainOrd.item() / (trainSamples)) * 100

    print('Train: Epoch = {} | Loss = {} | Loss_orders = {} | Accuracy = {} | Accuracy_orders = {} | Accuracy_mmap = {}'.format(epoch+1, avg_loss, loss_fn(orders_logit, ords_lbVariable).item(), trainAccuracy, ordersAccuracy, mmapAccuracy))
    
    train_log_loss.write('Train Loss after {} epochs = {} \n'.format(epoch + 1, avg_loss))
    train_log_acc.write('Train Accuracy after {} epochs = {} | Accuracy_orders = {}%\n'.format(epoch + 1, trainAccuracy,ordersAccuracy))
    writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
    writer.add_scalar('train/accuracy', trainAccuracy, epoch+1)
    
    if (epoch+1) % 1 == 0:
        model.train(False)
        val_loss_epoch = 0
        val_iter = 0
        val_samples = 0
        numCorr = 0
        for j, (inputs, targets) in enumerate(val_loader):
            val_iter += 1
            val_samples += inputs.size(0)
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda(non_blocking=True))
            output_label, _, _, _ = model(inputVariable, ORD=False, CAM=CAM, MS=False)
            val_loss = loss_fn(output_label, labelVariable)
            val_loss_epoch += val_loss.item()
            _, predicted = torch.max(output_label.data, 1)
            numCorr += (predicted == targets.cuda()).sum()

        val_accuracy = (numCorr.item() / val_samples) * 100

        avg_val_loss = val_loss_epoch / val_iter
        print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
        writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
        writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
        val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
        val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
        
        if val_accuracy > min_accuracy:
            save_path_model = (model_folder + '/model_rgb_state_dict.pth')
            torch.save(model.state_dict(), save_path_model)
            min_accuracy = val_accuracy
    
    # Step the scheduler
    optim_scheduler.step()
    

train_log_loss.close()
train_log_acc.close()
val_log_acc.close()
val_log_loss.close()
writer.export_scalars_to_json(model_folder + "/all_scalars.json")
writer.close()

print('Best accuracy after {} epochs = {}'.format(epoch, min_accuracy))

Train: Epoch = 1 | Loss = 10.043387933210893 | Loss_orders = 5.97104549407959 | Accuracy = 28.22822822822823 | Accuracy_orders = 1.8018018018018018 | Accuracy_mmap = 85.62936113956522
Val: Epoch = 1 | Loss 3.260405659675598 | Accuracy = 20.689655172413794
Train: Epoch = 2 | Loss = 9.61958594755693 | Loss_orders = 6.589447975158691 | Accuracy = 18.01801801801802 | Accuracy_orders = 0.9009009009009009 | Accuracy_mmap = 94.84323974119893
Val: Epoch = 2 | Loss 3.0401822328567505 | Accuracy = 18.103448275862068
Train: Epoch = 3 | Loss = 8.11908500844782 | Loss_orders = 4.279300212860107 | Accuracy = 23.423423423423422 | Accuracy_orders = 0.9009009009009009 | Accuracy_mmap = 94.5578231292517
Val: Epoch = 3 | Loss 2.9515483379364014 | Accuracy = 25.862068965517242
Train: Epoch = 4 | Loss = 7.928595499558882 | Loss_orders = 5.035255432128906 | Accuracy = 26.726726726726728 | Accuracy_orders = 1.2012012012012012 | Accuracy_mmap = 94.28816571673715
Val: Epoch = 4 | Loss 2.6303025484085083 | Accu