In [1]:
from glob import glob
from time import time
import os, cv2, copy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from reader import readShortVideo, getVideoList
from utils import showFrames

# 1. import the table contains video and label info.

In [2]:
task = "train"
afterCnt = 300 # we want to down-sample to 300 images for each videos

names = []
train_x = []
train_y = []

for label_path in glob('./hw4_data/FullLengthVideos/labels/'+task+'/*') :
    name = label_path.split('/')[-1].replace('.txt','')
    print('[INFO] load images and label for video :', name, '...', end='')
    
    # load images paths
    img_paths = glob(os.path.join('./hw4_data/FullLengthVideos/videos/'+task, name, '*'))
    img_paths.sort()
    
    # load labels
    labels = []
    with open(label_path, 'r') as file:
        for label in file:
            labels.append(int(label.replace('\n','')))
    
    # check if lengths of img_paths and labels are match
    if not len(img_paths) == len(labels) :
        raise ValueError('[ERROR] Mismatch of length between frames and labels.')
    
    # down-sample
    idx_lst = []
    rawCnt = len(img_paths)
    interval = rawCnt / afterCnt
    for idx in range(1, afterCnt+1):
        idx_lst.append(int(interval * idx) - 1) # select index for down-sample step
    
    # select down-sample labels and image paths
    labels = np.array(labels)
    img_paths = np.array(img_paths)
    
    labels = labels[idx_lst]
    img_paths = img_paths[idx_lst]
    
    # load images
    images = []
    for img_path in img_paths:
        images.append(cv2.imread(img_path))
    images = np.array(images)
    images = images[:,:,:,::-1]
    
    # finally put into train_x and train_y
    names.append(name)
    train_x.append(images)
    train_y.append(labels)
    
    print('finish') 
    
    
train_x = np.array(train_x)
train_y = np.array(train_y)

[INFO] load images and label for video : OP04-R05-Cheeseburger ...finish
[INFO] load images and label for video : OP02-R05-Cheeseburger ...finish
[INFO] load images and label for video : OP04-R03-BaconAndEggs ...finish
[INFO] load images and label for video : OP05-R03-BaconAndEggs ...finish
[INFO] load images and label for video : OP06-R07-Pizza ...finish
[INFO] load images and label for video : OP03-R01-PastaSalad ...finish
[INFO] load images and label for video : OP03-R07-Pizza ...finish
[INFO] load images and label for video : OP01-R01-PastaSalad ...finish
[INFO] load images and label for video : OP04-R07-Pizza ...finish
[INFO] load images and label for video : OP01-R06-GreekSalad ...finish
[INFO] load images and label for video : OP01-R05-Cheeseburger ...finish
[INFO] load images and label for video : OP05-R07-Pizza ...finish
[INFO] load images and label for video : OP02-R03-BaconAndEggs ...finish
[INFO] load images and label for video : OP02-R07-Pizza ...finish
[INFO] load images 

# load pretrained model

In [3]:
import torch
from torch.autograd import Variable
from torch import nn
from torch import optim

In [4]:
# Full Video VGG model
class FVrnnVGG(nn.Module):
    def __init__(self, backend='vgg16', pretrained=True, n_label=11):
        super(FVrnnVGG, self).__init__()
        
        ### check valid 
        if backend in ['vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn']:
            pass
        else :
            print("[INFO] invalid backend '%s', change to 'vgg16_bn'" % backend)
            backend = 'vgg16_bn'
            
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        ### init param
        self.backend = backend
        self.pretrained = pretrained
        # model flow
        self.features = None
        self.avgpool = None
        self.RNN = None
        self.h0 = None # follow RNN
        self.c0 = None # follow RNN
        self.classifier = None
        
        ### init process
        self.load_pretrained() # load features
        self.create_RNN() # create RNN 
        self.create_classifier(n_label) # create last layer
        self.fix_features() # fix features weights
        self.load_Problem2_pretrain()
        
    def forward(self, input, h=None):
        '''
        input shape : (frame, channel, height, weight)
        output shape : (1, cls)
        '''
        
        # regard f:frames as b:batch
        x = self.features(input) # shape : (f, 512, 7, 10)
        x = self.avgpool(x) # shape (f, 512, 7, 7)      
        
        x = torch.flatten(x, start_dim=1) # (f, 25088)
        x = torch.unsqueeze(x,0) # (1, f, 25088)
        
        if type(h) == None :
            h = self.h0
        out, h = self.RNN(x, h) # out(1, f, 1024) & (num_layers=1, 1, 1024)
        x = torch.squeeze(out, 0) # (f, 1024)        
        
        x = self.classifier(x) # out shape : (f, 11)
        return x, h # (f, 11) & (1, 1, 1024)
    
    def load_pretrained(self):
        import torchvision.models as models
        backend_model = None
        try:
            if self.backend == 'vgg13' :
                backend_model = models.vgg13(pretrained=self.pretrained)
            elif self.backend == 'vgg13_bn' :
                backend_model = models.vgg13_bn(pretrained=self.pretrained)
            elif self.backend == 'vgg16' :
                backend_model = models.vgg16(pretrained=self.pretrained)
            elif self.backend == 'vgg16_bn':
                backend_model = models.vgg16_bn(pretrained=self.pretrained)
            
            
            else :
                raise ValueError("[ERROR] Unexpected backend name pass through previous check then into load_pretrained() .")
            # copy features flow
            self.features = copy.deepcopy(backend_model.features) 
            self.avgpool = copy.deepcopy(backend_model.avgpool)
            print("[INFO] load pretrained features successfully, backend : %s" % self.backend)
        except Exception as e:
            print(e)
    
    def create_RNN(self, rnn='GRU', hidden_size=1024, num_layers=1, batch_first=True):
        '''
        output (batch, seq, hidden_size)
        h_out (n_layer, batch, hidden_size)
        '''
        try:
            input_size = None
            if self.backend in ['vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn']:
                input_size = 25088
            else :
                raise ValueError("[ERROR] Unexpected backend name pass through previous check then into create_outLayer() .")
            
            if rnn == 'GRU' :
                self.RNN = nn.GRU(
                    input_size=input_size,
                    hidden_size=hidden_size,
                    num_layers=num_layers, 
                    batch_first=batch_first,
                )
                self.h0 = Variable(torch.zeros((num_layers,1,hidden_size)), requires_grad=False).to(self.device) # bach_size = 1
            
            else :
                raise ValueError("[ERROR] Unexpected rnn '%s', please select one in ['GRU']" & rnn)
                
            print("[INFO] create RNN component successfully, rnn : %s ." % rnn)
        except Exception as e:
            print(e)
        
        
    def create_classifier(self, n_label=11):
        try:
            if self.backend in ['vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn'] :
                self.classifier = nn.Sequential(
                    nn.Linear(1024, 1024),
                    nn.ReLU(inplace=True),
                    nn.Dropout(0.5),
                    nn.Linear(1024, 11),
                    nn.Softmax(),
                )
            else :
                raise ValueError("[ERROR] Unexpected backend name pass through previous check then into create_outLayer() .")
        
            print("[INFO] create classifier successfully.")
        except Exception as e:
            print(e)
                
    def fix_features(self): # fix features weights
        for param in self.features.parameters():
            param.requires_grad = False
    
    def load_Problem2_pretrain(self, path='./storage/'):
        if self.device == 'cuda':
            self.RNN.load_state_dict(torch.load(os.path.join(path, 'MFrnnVGG_RNN.pkl')))
            self.classifier.load_state_dict(torch.load(os.path.join(path, 'MFrnnVGG_classifier.pkl')))
        else :
            self.RNN.load_state_dict(torch.load(os.path.join(path, 'MFrnnVGG_RNN.pkl'), map_location=lambda storage, loc: storage))
            self.classifier.load_state_dict(torch.load(os.path.join(path, 'MFrnnVGG_classifier.pkl'), map_location=lambda storage, loc: storage))
        
        print("[INFO] load Problem2 pretrained weight successfully.")


model = FVrnnVGG(backend='vgg13_bn')

[INFO] load pretrained features successfully, backend : vgg13_bn
[INFO] create RNN component successfully, rnn : GRU .
[INFO] create classifier successfully.
[INFO] load Problem2 pretrained weight successfully.


# train model

In [5]:
# GPU is useless when batch size = 1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
from torch import FloatTensor, LongTensor


epochs = 200
lr=1e-3

criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(list(model.RNN.parameters()), lr=lr)

model.to(device)
# model.h0.to(device)


#################### history ####################
history = {}
history['loss'] = []
history['acc'] = []

history['err'] = {}
history['err']['epoch'] = []
history['err']['err_idx'] = []
history['err']['err_msg'] = []
#################################################

for epoch in range(epochs):
    start_time = time()
    total_loss = 0.
    acc = 0.
    cnt = 0
    
    for idx, (X, Y) in enumerate(zip(train_x, train_y)):
        X = np.transpose(X, (0,3,1,2)) # transpose for torch input : shape (f, 240, 320, 3) --> (f, 3, 240, 320)
        
        
#         try:
            # because of lack of calculate resources, i devide each videos to 10 parts with 20 frames foreach.
        interval = 20
        h = model.h0
        for idx in range(0, X.shape[0])[::interval]:
            x = X[idx:idx+interval]
            y = Y[idx:idx+interval]

            x = Variable(FloatTensor(x)).to(device)
            y = Variable(LongTensor(y)).to(device)

            optimizer.zero_grad()
            pred, h = model(x, h)
            loss = criterion(pred, y)
            loss.backward(retain_graph=True)
            optimizer.step()

            acc += (sum(pred.argmax(dim=1) == y).item() / interval)
            total_loss += loss.item()
            cnt += 1


#         except Exception as e:
#             history['err']['epoch'].append(epoch)
#             history['err']['err_idx'].append(idx)
#             history['err']['err_msg'].append(str(e))
        
        
    history['loss'].append(total_loss/cnt)
    history['acc'].append((100*acc)/cnt)
    print('[INFO] epoch (%d/%d), cost: %d sec | loss : %.6f | acc : %.2f%%' % (epoch, epochs, (time()-start_time), (total_loss/cnt), (100*acc/cnt)))


  input = module(input)


[INFO] epoch (0/200), cost: 93 sec | loss : 2.118162 | acc : 42.99%
[INFO] epoch (1/200), cost: 93 sec | loss : 2.055091 | acc : 49.38%
[INFO] epoch (2/200), cost: 93 sec | loss : 2.040490 | acc : 50.88%
[INFO] epoch (3/200), cost: 93 sec | loss : 2.028677 | acc : 51.90%
[INFO] epoch (4/200), cost: 93 sec | loss : 2.020675 | acc : 52.91%
[INFO] epoch (5/200), cost: 93 sec | loss : 2.014509 | acc : 53.42%
[INFO] epoch (6/200), cost: 93 sec | loss : 2.009074 | acc : 54.06%
[INFO] epoch (7/200), cost: 93 sec | loss : 2.004512 | acc : 54.28%
[INFO] epoch (8/200), cost: 93 sec | loss : 2.001054 | acc : 54.55%
[INFO] epoch (9/200), cost: 93 sec | loss : 1.995099 | acc : 55.52%
[INFO] epoch (10/200), cost: 93 sec | loss : 1.989475 | acc : 55.90%
[INFO] epoch (11/200), cost: 93 sec | loss : 1.986925 | acc : 56.12%
[INFO] epoch (12/200), cost: 93 sec | loss : 1.983295 | acc : 56.62%
[INFO] epoch (13/200), cost: 93 sec | loss : 1.979444 | acc : 56.81%
[INFO] epoch (14/200), cost: 93 sec | loss :

[INFO] epoch (119/200), cost: 93 sec | loss : 1.885164 | acc : 66.46%
[INFO] epoch (120/200), cost: 93 sec | loss : 1.885074 | acc : 66.48%
[INFO] epoch (121/200), cost: 93 sec | loss : 1.884904 | acc : 66.43%
[INFO] epoch (122/200), cost: 93 sec | loss : 1.883594 | acc : 66.64%
[INFO] epoch (123/200), cost: 93 sec | loss : 1.884718 | acc : 66.49%
[INFO] epoch (124/200), cost: 93 sec | loss : 1.882836 | acc : 66.62%
[INFO] epoch (125/200), cost: 93 sec | loss : 1.882990 | acc : 66.81%
[INFO] epoch (126/200), cost: 93 sec | loss : 1.880938 | acc : 66.80%
[INFO] epoch (127/200), cost: 93 sec | loss : 1.881171 | acc : 66.81%
[INFO] epoch (128/200), cost: 93 sec | loss : 1.881026 | acc : 66.90%
[INFO] epoch (129/200), cost: 93 sec | loss : 1.880094 | acc : 66.90%
[INFO] epoch (130/200), cost: 93 sec | loss : 1.879983 | acc : 66.84%
[INFO] epoch (131/200), cost: 93 sec | loss : 1.879718 | acc : 67.13%
[INFO] epoch (132/200), cost: 93 sec | loss : 1.878240 | acc : 67.09%
[INFO] epoch (133/20

In [7]:
torch.save(model.classifier.state_dict(), './storage/FVrnnVGG_classifier.pkl')
torch.save(model.RNN.state_dict(), './storage/FVrnnVGG_RNN.pkl')

In [8]:
import pickle


with open('./storage/history_p2_FVrnnVGG_vgg13bn_epoch200', 'wb') as handle:
    pickle.dump(history, handle, protocol=pickle.HIGHEST_PROTOCOL)

## valid and report

In [20]:
task = "valid"

names = []
label_y = []
pred_y = []
correct = 0 
cnt = 0

for label_path in glob('./hw4_data/FullLengthVideos/labels/'+task+'/*') :
    name = label_path.split('/')[-1].replace('.txt','')
#     print('[INFO] load images and label for video :', name, '...', end='')
    pred = []
    
    
    # load labels
    labels = []
    with open(label_path, 'r') as file:
        for label in file:
            labels.append(int(label.replace('\n','')))
    
    with open(os.path.join('./output/', name+'.txt'), 'r') as file:
        for line in file:
            pred.append(int(line.replace('\n','')))
   
    # to numpy
    labels = np.array(labels)
    pred = np.array(pred)
    
    # agg
    if not len(pred) == len(labels):
        raise ValueError('[ERROR] Mismatch between pred and label, pred:%d  , label:%d' % (len(pred, len(labels))))
    cnt += len(pred)
    correct += sum(labels == pred)
    localAcc = 100 * correct/cnt
    
    # finally put into train_x and train_y
    names.append(name)
    label_y.append(labels)
    pred_y.append(pred)
    print('len: %d ... finish, local accuracy:%.2f%%' % (len(labels), localAcc)) 
    
    
print('[INFO] finish, correct:%d and total:%d, accuracy is %.2f%%' % (correct, cnt, 100*correct/cnt))

len: 1012 ... finish, local accuracy:16.50%
len: 1085 ... finish, local accuracy:16.07%
len: 2471 ... finish, local accuracy:11.97%
len: 982 ... finish, local accuracy:12.68%
len: 889 ... finish, local accuracy:13.36%
len: 948 ... finish, local accuracy:13.01%
len: 1551 ... finish, local accuracy:13.11%
[INFO] finish, correct:1172 and total:8938, accuracy is 13.11%


## visual result

In [32]:
# choose first video (acc:16.50%)
idx = 0
name = names[idx]
label = label_y[0]
pred = pred_y[0]

idx = list(np.arange(len(label)))
set(label)

{0, 2, 3, 4, 5, 6, 8}