**Install requirements**

In [2]:
#!pip3 install 'torch==0.3.1'
!pip3 install 'tensorboardX' 



**Import Google Drive**

In [3]:
# Load the Drive helper and mount
from google.colab import drive
import os
drive.mount('/content/drive')

path = 'drive/My Drive/ego-rnn/'
os.chdir(path)
cwd = os.getcwd()
print("Current dir: "+cwd)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Current dir: /content/drive/My Drive/ego-rnn


**Import libraries**

In [4]:
from __future__ import print_function, division
#from objectAttentionModelConvLSTM import *
from objectModelConvLSTM import *
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize, MultiScaleCornerCrop,
                                RandomHorizontalFlip)
from tensorboardX import SummaryWriter
from makeDatasetRGB import *

import argparse
import sys
import matplotlib.pyplot as plt

**Set Arguments**

In [5]:
data_dir = "GTEA61/processed_frames2"
user_train = ['S1','S3','S4']
user_val = ['S2']
out_dir = 'experiments'
trainBatchSize = 32
valBatchSize = 64
memSize = 512
num_classes = 61

frame = 7
seqLen = frame


**Prepare Dataset and Dataloader**

In [6]:
# Data loader
normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                              ToTensor(), normalize])

vid_seq_train = makeDataset(data_dir, user_train, frame,
                            spatial_transform=spatial_transform, seqLen=seqLen, fmt='.png')

train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                        shuffle=True, num_workers=4, pin_memory=True)


vid_seq_val = makeDataset(data_dir, user_val, frame,
                            spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                            seqLen=seqLen, fmt='.png')

val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                        shuffle=False, num_workers=2, pin_memory=True)


valInstances = vid_seq_val.__len__()
trainInstances = vid_seq_train.__len__()

print('Number of samples in the dataset: training = {} | validation = {}'.format(trainInstances, valInstances))

Number of samples in the dataset: training = 336 | validation = 116


**Stage 1**

**Set Parameters**

In [6]:
numEpochs = 200
lr1 = 1e-3
decay_step = [25, 75, 150]
decay_factor = 0.1

In [14]:
model_folder = os.path.join('./', out_dir, 'rgb', 'ConvLSTM-Attention', 'stage1')  # Dir for saving models and log files
# Create the dir
if os.path.exists(model_folder):
    print('Directory {} exists!'.format(model_folder))
    #sys.exit()
#os.makedirs(model_folder)

# Log files
writer = SummaryWriter(model_folder)
train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

**Prepare Network and Training**

In [13]:
train_params = []

model = attentionModel(num_classes=num_classes, mem_size=memSize)
#model = convLSTMModel(num_classes=num_classes, mem_size=memSize)
model.train(False)
for params in model.parameters():
    params.requires_grad = False

for params in model.lstm_cell.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.classifier.parameters():
    params.requires_grad = True
    train_params += [params]

model.lstm_cell.train(True)

model.classifier.train(True)
model.cuda()

attentionModel(
  (resNet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, tra

**Define Data Preprocessing**

In [9]:
loss_fn = nn.CrossEntropyLoss()

optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4)

optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step,
                                                        gamma=decay_factor)

**Train**

In [10]:
train_iter = 0
min_accuracy = 0

for epoch in range(numEpochs):
    
    epoch_loss = 0
    numCorrTrain = 0
    trainSamples = 0
    iterPerEpoch = 0
    model.lstm_cell.train(True)
    model.classifier.train(True)
    writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)
    for i, (inputs, targets) in enumerate(train_loader):
        train_iter += 1
        iterPerEpoch += 1
        optimizer_fn.zero_grad()
        inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
        labelVariable = Variable(targets.cuda())
        trainSamples += inputs.size(0)
        output_label, _ = model(inputVariable)
        loss = loss_fn(output_label, labelVariable)
        loss.backward()
        optimizer_fn.step()
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += (predicted == targets.cuda()).sum()
        epoch_loss += loss.item()
    avg_loss = epoch_loss/iterPerEpoch
    trainAccuracy = (numCorrTrain.item() / trainSamples) * 100

    print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy))
    writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
    writer.add_scalar('train/accuracy', trainAccuracy, epoch+1)
    train_log_loss.write('Train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss))
    train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy))
    
    if (epoch+1) % 1 == 0:
        model.train(False)
        val_loss_epoch = 0
        val_iter = 0
        val_samples = 0
        numCorr = 0
        for j, (inputs, targets) in enumerate(val_loader):
            val_iter += 1
            val_samples += inputs.size(0)
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda(non_blocking=True))
            output_label, _ = model(inputVariable)
            val_loss = loss_fn(output_label, labelVariable)
            val_loss_epoch += val_loss.item()
            _, predicted = torch.max(output_label.data, 1)
            numCorr += (predicted == targets.cuda()).sum()
        val_accuracy = (numCorr.item() / val_samples) * 100
        avg_val_loss = val_loss_epoch / val_iter
        print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
        writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
        writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
        val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
        val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
        if val_accuracy > min_accuracy:
            stage1_dict = (model_folder + '/model_rgb_state_dict.pth')
            torch.save(model.state_dict(), stage1_dict)
            min_accuracy = val_accuracy
    
    optim_scheduler.step()

train_log_loss.close()
train_log_acc.close()
val_log_acc.close()
val_log_loss.close()
writer.export_scalars_to_json(model_folder + "/all_scalars.json")
writer.close()

print('Best accuracy after {} epochs = {}'.format(epoch, min_accuracy))

Train: Epoch = 1 | Loss = 4.078752149235118 | Accuracy = 2.976190476190476
Val: Epoch = 1 | Loss 3.961909055709839 | Accuracy = 6.0344827586206895
Train: Epoch = 2 | Loss = 4.051681865345348 | Accuracy = 6.845238095238096
Val: Epoch = 2 | Loss 3.9288421869277954 | Accuracy = 6.0344827586206895
Train: Epoch = 3 | Loss = 4.01118523424322 | Accuracy = 6.845238095238096
Val: Epoch = 3 | Loss 3.8730334043502808 | Accuracy = 8.620689655172415
Train: Epoch = 4 | Loss = 3.9233627536080102 | Accuracy = 7.738095238095238
Val: Epoch = 4 | Loss 3.923434257507324 | Accuracy = 6.896551724137931
Train: Epoch = 5 | Loss = 3.8082044558091597 | Accuracy = 10.119047619047619
Val: Epoch = 5 | Loss 3.691694498062134 | Accuracy = 6.896551724137931
Train: Epoch = 6 | Loss = 3.7218796123157847 | Accuracy = 8.928571428571429
Val: Epoch = 6 | Loss 3.6967068910598755 | Accuracy = 16.379310344827587
Train: Epoch = 7 | Loss = 3.6786505092274058 | Accuracy = 8.630952380952381
Val: Epoch = 7 | Loss 3.586186289787292

KeyboardInterrupt: ignored

**Stage 2**

**Set Parameters**

In [7]:
numEpochs = 150
lr1 = 1e-4
decay_step = [25, 75]
decay_factor = 0.1

In [8]:
model_folder = os.path.join('./', out_dir, 'rgb', 'ConvLSTM-Attention', 'stage2')  # Dir for saving models and log files
# Create the dir
if os.path.exists(model_folder):
    print('Directory {} exists!'.format(model_folder))
    #sys.exit()
#os.makedirs(model_folder)

# Log files
writer = SummaryWriter(model_folder)
train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')

Directory ./experiments/rgb/ConvLSTM-Attention/stage2 exists!


**Prepare Network and Train**

In [9]:
train_params = []
stage1_dict = (out_dir + '/rgb/ConvLSMT/7frame/stage1/model_rgb_state_dict.pth')

#model = attentionModel(num_classes=num_classes, mem_size=memSize)
model = convLSTMModel(num_classes=num_classes, mem_size=memSize)

model.load_state_dict(torch.load(stage1_dict))
model.train(False)
for params in model.parameters():
    params.requires_grad = False
#
for params in model.resNet.layer4[0].conv1.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.resNet.layer4[0].conv2.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.resNet.layer4[1].conv1.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.resNet.layer4[1].conv2.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.resNet.layer4[2].conv1.parameters():
    params.requires_grad = True
    train_params += [params]
#
for params in model.resNet.layer4[2].conv2.parameters():
    params.requires_grad = True
    train_params += [params]
#
for params in model.resNet.fc.parameters():
    params.requires_grad = True
    train_params += [params]

model.resNet.layer4[0].conv1.train(True)
model.resNet.layer4[0].conv2.train(True)
model.resNet.layer4[1].conv1.train(True)
model.resNet.layer4[1].conv2.train(True)
model.resNet.layer4[2].conv1.train(True)
model.resNet.layer4[2].conv2.train(True)
model.resNet.fc.train(True)

for params in model.lstm_cell.parameters():
    params.requires_grad = True
    train_params += [params]

for params in model.classifier.parameters():
    params.requires_grad = True
    train_params += [params]

model.lstm_cell.train(True)

model.classifier.train(True)
model.cuda()

convLSTMModel(
  (resNet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, trac

**Define Data Preprocessing**

In [10]:
loss_fn = nn.CrossEntropyLoss()

optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4)

optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step,
                                                        gamma=decay_factor)

**Train**

In [None]:
train_iter = 0
min_accuracy = 0

for epoch in range(numEpochs):
    epoch_loss = 0
    numCorrTrain = 0
    trainSamples = 0
    iterPerEpoch = 0
    model.lstm_cell.train(True)
    model.classifier.train(True)
    writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1)

    model.resNet.layer4[0].conv1.train(True)
    model.resNet.layer4[0].conv2.train(True)
    model.resNet.layer4[1].conv1.train(True)
    model.resNet.layer4[1].conv2.train(True)
    model.resNet.layer4[2].conv1.train(True)
    model.resNet.layer4[2].conv2.train(True)
    model.resNet.fc.train(True)

    for i, (inputs, targets) in enumerate(train_loader):
        train_iter += 1
        iterPerEpoch += 1
        optimizer_fn.zero_grad()
        inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
        labelVariable = Variable(targets.cuda())
        trainSamples += inputs.size(0)
        output_label, _ = model(inputVariable)
        loss = loss_fn(output_label, labelVariable)
        loss.backward()
        optimizer_fn.step()
        _, predicted = torch.max(output_label.data, 1)
        numCorrTrain += (predicted == targets.cuda()).sum()
        epoch_loss += loss.item()
    avg_loss = epoch_loss/iterPerEpoch
    trainAccuracy = (numCorrTrain.item() / trainSamples) * 100

    print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy))
    
    train_log_loss.write('Train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss))
    train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy))
    writer.add_scalar('train/epoch_loss', avg_loss, epoch+1)
    writer.add_scalar('train/accuracy', trainAccuracy, epoch+1)
    
    if (epoch+1) % 1 == 0:
        model.train(False)
        val_loss_epoch = 0
        val_iter = 0
        val_samples = 0
        numCorr = 0
        for j, (inputs, targets) in enumerate(val_loader):
            val_iter += 1
            val_samples += inputs.size(0)
            inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda())
            labelVariable = Variable(targets.cuda(non_blocking=True))
            output_label, _ = model(inputVariable)
            val_loss = loss_fn(output_label, labelVariable)
            val_loss_epoch += val_loss.item()
            _, predicted = torch.max(output_label.data, 1)
            numCorr += (predicted == targets.cuda()).sum()
        val_accuracy = (numCorr.item() / val_samples) * 100
        avg_val_loss = val_loss_epoch / val_iter
        print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy))
        writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
        writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
        val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
        val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
        if val_accuracy > min_accuracy:
            save_path_model = (model_folder + '/model_rgb_state_dict.pth')
            torch.save(model.state_dict(), save_path_model)
            min_accuracy = val_accuracy
    
    optim_scheduler.step()

train_log_loss.close()
train_log_acc.close()
val_log_acc.close()
val_log_loss.close()
writer.export_scalars_to_json(model_folder + "/all_scalars.json")
writer.close()

print('Best accuracy after {} epochs = {}'.format(epoch, min_accuracy))

Train: Epoch = 1 | Loss = 3.471258661963723 | Accuracy = 14.285714285714285
Val: Epoch = 1 | Loss 3.194247245788574 | Accuracy = 16.379310344827587
Train: Epoch = 2 | Loss = 3.039696129885587 | Accuracy = 20.238095238095237
Val: Epoch = 2 | Loss 2.91312313079834 | Accuracy = 22.413793103448278
Train: Epoch = 3 | Loss = 2.898478551344438 | Accuracy = 25.0
Val: Epoch = 3 | Loss 2.8342032432556152 | Accuracy = 22.413793103448278
Train: Epoch = 4 | Loss = 2.723279606212269 | Accuracy = 27.976190476190478
Val: Epoch = 4 | Loss 2.8456143140792847 | Accuracy = 23.275862068965516
Train: Epoch = 5 | Loss = 2.73574237389998 | Accuracy = 24.702380952380953
Val: Epoch = 5 | Loss 2.8176686763763428 | Accuracy = 21.551724137931032
Train: Epoch = 6 | Loss = 2.7690666805614126 | Accuracy = 23.809523809523807
Val: Epoch = 6 | Loss 3.038485288619995 | Accuracy = 22.413793103448278
Train: Epoch = 7 | Loss = 2.7501590685410933 | Accuracy = 24.107142857142858
Val: Epoch = 7 | Loss 2.7722268104553223 | Accu