In [1]:
!pip3 install 'torch==0.3.1'
!pip3 install 'tensorboardX' 



In [2]:
# Load the Drive helper and mount
from google.colab import drive
import os
drive.mount('/content/drive')

path = 'drive/My Drive/ego-rnn/'
os.chdir(path)
cwd = os.getcwd()
print("Current dir: "+cwd)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Current dir: /content/drive/My Drive/ego-rnn


In [0]:
from __future__ import print_function, division
from spatial_transforms import (Compose, ToTensor, CenterCrop, Scale, Normalize, MultiScaleCornerCrop,
 RandomHorizontalFlip)
from tensorboardX import SummaryWriter
import torch.nn as nn
from twoStreamModel import *
from torch.autograd import Variable
from torch.utils.data.sampler import WeightedRandomSampler
from makeDatasetTwoStream import *
import argparse

import sys

In [0]:
data_dir = "GTEA61/flow_x_processed"
user_train = ['S1','S3','S4']
user_val = ['S2']
out_dir = 'experiments'
seqLen = 25
trainBatchSize = 32
valBatchSize = 32
memSize = 512
num_classes = 61
frame = 5

flowModel='./experiments/flow/model_flow_state_dict.pth'
rgbModel='./experiments/rgb/ConvLSMT-Attention/16frames/stage2/model_rgb_state_dict.pth'

In [0]:
normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224),
                              ToTensor(), normalize])



In [0]:
vid_seq_train = makeDataset(data_dir,user_train, spatial_transform=spatial_transform,
                               sequence=False, stackSize=frame, numSeg=1, fmt='.png', seqLen=seqLen)
train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize,
                            shuffle=True, num_workers=4, pin_memory=True)
vid_seq_val = makeDataset(data_dir,user_val, spatial_transform=Compose([Scale(256), CenterCrop(224), ToTensor(), normalize]),
                                   sequence=False, stackSize=frame, numSeg=1, fmt='.png', phase='Test',
                                   seqLen=seqLen)

val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize,
                                shuffle=False, num_workers=2, pin_memory=True)

In [7]:
valInstances = vid_seq_val.__len__()
trainInstances = vid_seq_train.__len__()
print('Number of samples in the dataset: training = {} | validation = {}'.format(trainInstances, valInstances))

Number of samples in the dataset: training = 341 | validation = 116


In [0]:
numEpochs = 250
lr1 = 1e-2
decay_step =1
decay_factor = 0.99

In [0]:
model_folder = os.path.join('./', out_dir, 'twoStream16frm' )  # Dir for saving models and log files
# Create the dir
if os.path.exists(model_folder):
    print('Directory {} exists!'.format(model_folder))
    sys.exit()
os.makedirs(model_folder)

# Log files
writer = SummaryWriter(model_folder)
train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w')
train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w')
val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w')
val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w')


In [10]:
 model = twoStreamAttentionModel(flowModel=flowModel, frameModel=rgbModel, stackSize=frame,  memSize=memSize, num_classes=num_classes)

for params in model.parameters():
  params.requires_grad = False

model.train(False)
train_params = []

for params in model.classifier.parameters():
  params.requires_grad = True
  train_params += [params]

for params in model.frameModel.lstm_cell.parameters():
  train_params += [params]
  params.requires_grad = True

for params in model.frameModel.resNet.layer4[0].conv1.parameters():
  params.requires_grad = True
  train_params += [params]

for params in model.frameModel.resNet.layer4[0].conv2.parameters():
  params.requires_grad = True
  train_params += [params]

for params in model.frameModel.resNet.layer4[1].conv1.parameters():
  params.requires_grad = True
  train_params += [params]

for params in model.frameModel.resNet.layer4[1].conv2.parameters():
  params.requires_grad = True
  train_params += [params]

for params in model.frameModel.resNet.layer4[2].conv1.parameters():
  params.requires_grad = True
  train_params += [params]
    #
for params in model.frameModel.resNet.layer4[2].conv2.parameters():
  params.requires_grad = True
  train_params += [params]
    #
for params in model.frameModel.resNet.fc.parameters():
  params.requires_grad = True
  train_params += [params]

base_params = []
for params in model.flowModel.layer4.parameters():
  base_params += [params]
  params.requires_grad = True

model.cuda()

twoStreamAttentionModel(
  (flowModel): ResNet(
    (conv1): Conv2d(10, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
    (relu): ReLU(inplace)
    (maxpool): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1), ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (relu): ReLU(inplace)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
        (relu): ReLU(inplace)
        (conv2): Conv2d(64, 

In [0]:
loss_fn = nn.CrossEntropyLoss()
optimizer_fn = torch.optim.SGD([{'params': train_params},{'params': base_params, 'lr': 1e-4},], lr=lr1, momentum=0.9, weight_decay=5e-4)
optim_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_fn, step_size=decay_step, gamma=decay_factor)
trainSamples = vid_seq_train.__len__()
valSamples=vid_seq_val.__len__()

In [12]:
train_iter = 0
min_accuracy = 0

for epoch in range(numEpochs):
  optim_scheduler.step()
  epoch_loss = 0
  numCorrTrain = 0
  iterPerEpoch = 0
  model.classifier.train(True)
  model.flowModel.layer4.train(True)
  for j, (inputFlow, inputFrame, targets) in enumerate(train_loader):
    train_iter += 1
    iterPerEpoch += 1
    optimizer_fn.zero_grad()
    inputVariableFlow = Variable(inputFlow.cuda())
    inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda())
    labelVariable = Variable(targets.cuda())
    output_label = model(inputVariableFlow, inputVariableFrame)
    loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)
    loss.backward()
    optimizer_fn.step()
    _, predicted = torch.max(output_label.data, 1)
    numCorrTrain += (predicted == targets.cuda()).sum()
    epoch_loss += loss.data[0]
  avg_loss = epoch_loss / iterPerEpoch
  trainAccuracy = (numCorrTrain / trainSamples) * 100
  print('Average training loss after {} epoch = {} '.format(epoch + 1, avg_loss))
  print('Training accuracy after {} epoch = {}% '.format(epoch + 1, trainAccuracy))
  writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1)
  writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1)
  train_log_loss.write('Training loss after {} epoch = {}\n'.format(epoch + 1, avg_loss))
  train_log_acc.write('Training accuracy after {} epoch = {}\n'.format(epoch + 1, trainAccuracy))    
  if (epoch + 1) % 1 == 0:
    model.train(False)
    val_loss_epoch = 0
    val_iter = 0
    numCorr = 0
    for j, (inputFlow, inputFrame, targets) in enumerate(val_loader):
       val_iter += 1
       inputVariableFlow = Variable(inputFlow.cuda())
       inputVariableFrame = Variable(inputFrame.permute(1, 0, 2, 3, 4).cuda())
       labelVariable = Variable(targets.cuda())
       output_label = model(inputVariableFlow, inputVariableFrame)
       loss = loss_fn(F.log_softmax(output_label, dim=1), labelVariable)
       val_loss_epoch += loss.data[0]
       _, predicted = torch.max(output_label.data, 1)
       numCorr += (predicted == labelVariable.data).sum()
    val_accuracy = (numCorr / valSamples) * 100
    avg_val_loss = val_loss_epoch / val_iter
    print('Val Loss after {} epochs, loss = {}'.format(epoch + 1, avg_val_loss))
    print('Val Accuracy after {} epochs = {}%'.format(epoch + 1, val_accuracy))
    writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1)
    writer.add_scalar('val/accuracy', val_accuracy, epoch + 1)
    val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss))
    val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy))
    if val_accuracy > min_accuracy:
      save_path_model = (model_folder + '/model_twoStream_state_dict.pth')
      torch.save(model.state_dict(), save_path_model)
      min_accuracy = val_accuracy
train_log_loss.close()
train_log_acc.close()
val_log_acc.close()
val_log_loss.close()
writer.export_scalars_to_json(model_folder + "/all_scalars.json")
writer.close()

Average training loss after 1 epoch = 3.6909544467926025 
Training accuracy after 1 epoch = 16.129032258064516% 
Val Loss after 1 epochs, loss = 2.837019383907318
Val Accuracy after 1 epochs = 24.137931034482758%
Average training loss after 2 epoch = 2.302162755619396 
Training accuracy after 2 epoch = 39.589442815249264% 
Val Loss after 2 epochs, loss = 2.309513747692108
Val Accuracy after 2 epochs = 43.96551724137931%
Average training loss after 3 epoch = 1.9724411856044421 
Training accuracy after 3 epoch = 48.09384164222874% 
Val Loss after 3 epochs, loss = 1.8431525528430939
Val Accuracy after 3 epochs = 48.275862068965516%
Average training loss after 4 epoch = 1.6041024814952503 
Training accuracy after 4 epoch = 56.598240469208214% 
Val Loss after 4 epochs, loss = 1.8304044306278229
Val Accuracy after 4 epochs = 48.275862068965516%
Average training loss after 5 epoch = 1.353655148636211 
Training accuracy after 5 epoch = 66.27565982404691% 
Val Loss after 5 epochs, loss = 1.8523

In [13]:
print('Best accuracy after {} epochs = {}'.format(epoch+1, min_accuracy))

Best accuracy after 250 epochs = 70.6896551724138
