In [None]:
from glob import glob
from time import time
import os, cv2, copy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from reader import readShortVideo, getVideoList
from utils import showFrames

# 1. import the table contains video and label info.

In [None]:
task = 'train'
path = './hw4_data/TrimmedVideos/'

train_table = pd.read_csv(os.path.join(path,'label/gt_'+task+'.csv'))
train_table.head(3)

In [None]:
train_table.columns

# 2. load training data and label

In [None]:
# train data
video_path = './hw4_data/TrimmedVideos/video/'+task+'/'
train_x = []
train_y = []

for idx, value in train_table[['Video_category', 'Video_name', 'Action_labels']].iterrows() :
    
    video_category = value.Video_category
    video_name = value.Video_name

    def custom_VideoNameExtractor(video_path, video_category, video_name):
        video_name = glob(os.path.join(video_path, video_category, video_name)+'*')[0]
        video_name = video_name.split('/')[-1]
        return video_name
    video_name = custom_VideoNameExtractor(video_path, video_category, video_name)
    try:
        frames =  readShortVideo(video_path=video_path, 
                                 video_category=video_category, 
                                 video_name = video_name)
        train_x.append(frames / 255)
        train_y.append(video.Action_labels)
    except:
        continue
    if idx % 100 == 0 :
        print("[INFO] loading progress, (%s/%s)" % (idx, len(train_table)))
    
    break
    
    
print("[INFO] load train_x successfully, train_x length :", len(train_x))

# load pretrained model

In [None]:
import torch
from torch.autograd import Variable
from torch import nn
from torch import optim

In [None]:
# Multi Frame VGG model
class MFVGG(nn.Module):
    def __init__(self, backend='vgg16', pretrained=True, n_label=11):
        super(MFVGG, self).__init__()
        
        ### check valid 
        if backend in ['vgg16', 'vgg16_bn']:
            pass
        else :
            print("[INFO] invalid backend '%s', change to 'vgg16_bn'" % backend)
            backend = 'vgg16_bn'
        
        ### init param
        self.backend = backend
        self.pretrained = pretrained
        # model flow
        self.features = None
        self.avgpool = None
        self.classifier = None
        self.outLayer = None # customize output for task : Linear(1000, 11)
        
        ### init process
        self.load_pretrained() # load features
        self.create_outLayer(n_label) # create last layer
        self.fix_features() # fix features weights
        
    def forward(self, input):
        '''
        input shape : (frame, channel, height, weight)
        output shape : (1, cls)
        '''
        f, c, h, w = input.shape
        
        # regard f:frames as b:batch
        x = self.features(input) # shape : (f, 512, 7, 10)
        x = self.avgpool(x) # shape (f, 512, 7, 7)      
        
        x = torch.flatten(x, start_dim=1) # (f, 25088)
        x = torch.mean(x, 0, keepdim=True) # (1, 25088)
        
        x = self.classifier(x) # out shape : (f, 1000)
        x = self.outLayer(x) # out shape : (f, 11)
        return x
    
    def load_pretrained(self):
        import torchvision.models as models
        backend_model = None
        try:
            if self.backend == 'vgg16' :
                backend_model = models.vgg16(pretrained=self.pretrained)
            elif self.backend == 'vgg16_bn':
                backend_model = models.vgg16_bn(pretrained=self.pretrained)
            else :
                raise ValueError("[ERROR] Unexpected backend name pass through previous check then into load_pretrained() .")
            # copy features flow
            self.features = copy.deepcopy(backend_model.features) 
            self.avgpool = copy.deepcopy(backend_model.avgpool)
            self.classifier = copy.deepcopy(backend_model.classifier)
            print("[INFO] load pretrained features successfully, backend : %s" % self.backend)
        except Exception as e:
            print(e)
                    
    def create_outLayer(self, n_label=11):
        try:
            if self.backend in ['vgg16', 'vgg16_bn'] :
                self.outLayer = nn.Sequential(
                    nn.Linear(1000, n_label),
                    nn.Softmax(dim=1),
                )
            else :
                raise ValueError("[ERROR] Unexpected backend name pass through previous check then into create_outLayer() .")
        except Exception as e:
            print(e)
                
    def fix_features(self): # fix features weights
        for param in self.features.parameters():
            param.requires_grad = False


model = MFVGG(backend='vgg16_bn')
            

# train model

In [None]:
# GPU is useless when batch size = 1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from torch import FloatTensor, LongTensor


epochs = 1
lr=1e-3
n_data = len(train_y)

criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
model.to(device)


for epoch in range(epochs):
    start_time = time()
    total_loss = 0.
    acc = 0.
    
    for x, y in zip(train_x, train_y):
        x = np.transpose(x, (0,3,1,2)) # transpose for torch input : (f, 240, 320, 4) --> (f, 3, 240, 320)
        y = np.array([y]) # shape (1,)

        x = Variable(FloatTensor(x)).to(device)
        y = Variable(LongTensor(y)).to(device)

        optimizer.zero_grad()
        pred = model(x)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
        
        acc += 1. if pred.argmax().item() == y.argmax().item() else 0.
        total_loss += loss.item()
    
    print('[INFO] epoch (%d/%d), cost: %d sec | loss : %.6f | acc : %.2f' % (epoch, epochs, (start_time-time()), (total_loss/n_data), (100*acc/n_data)))
