In [None]:
from glob import glob
from time import time
import os, cv2, copy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from reader import readShortVideo, getVideoList
from utils import showFrames

# 1. import the table contains video and label info.

In [None]:
task = 'train'
path = './hw4_data/TrimmedVideos/'

train_table = pd.read_csv(os.path.join(path,'label/gt_'+task+'.csv'))
train_table.head(3)

# 2. load training data and label

In [None]:
# train data
video_path = './hw4_data/TrimmedVideos/video/'+task+'/'
train_x = []
train_y = []

for idx, value in train_table[['Video_category', 'Video_name', 'Action_labels']].iterrows() :
    
    video_category = value.Video_category
    video_name = value.Video_name

    def custom_VideoNameExtractor(video_path, video_category, video_name):
        video_name = glob(os.path.join(video_path, video_category, video_name)+'*')[0]
        video_name = video_name.split('/')[-1]
        return video_name
    video_name = custom_VideoNameExtractor(video_path, video_category, video_name)
    
    try:
        
        frames =  readShortVideo(video_path=video_path, 
                                 video_category=video_category, 
                                 video_name = video_name)
        train_x.append(frames / 255)
        train_y.append(value.Action_labels)
    except Exception as e:
        print(e)
        
    if (idx+1) % 10 == 0 :
        print("[INFO] loading progress, (%s/%s)" % (idx, len(train_table)))
        break
    
    
print("[INFO] load train_x successfully, train_x length :", len(train_x))

# load pretrained model

In [None]:
import torch
from torch.autograd import Variable
from torch import nn
from torch import optim

In [None]:
# Multi Frame VGG model
class MFrnnVGG(nn.Module):
    def __init__(self, backend='vgg16', pretrained=True, n_label=11):
        super(MFrnnVGG, self).__init__()
        
        ### check valid 
        if backend in ['vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn']:
            pass
        else :
            print("[INFO] invalid backend '%s', change to 'vgg16_bn'" % backend)
            backend = 'vgg16_bn'
        
        ### init param
        self.backend = backend
        self.pretrained = pretrained
        # model flow
        self.features = None
        self.avgpool = None
        self.RNN = None
        self.h0 = None # follow RNN
        self.c0 = None # follow RNN
        self.classifier = None
        
        ### init process
        self.load_pretrained() # load features
        self.create_RNN() # create RNN 
        self.create_classifier(n_label) # create last layer
        self.fix_features() # fix features weights
        
    def forward(self, input):
        '''
        input shape : (frame, channel, height, weight)
        output shape : (1, cls)
        '''
        f, c, h, w = input.shape
        
        # regard f:frames as b:batch
        x = self.features(input) # shape : (f, 512, 7, 10)
        x = self.avgpool(x) # shape (f, 512, 7, 7)      
        
        x = torch.flatten(x, start_dim=1) # (f, 25088)
        x = torch.unsqueeze(x,0) # (1, f, 25088)
        
        out, h = self.RNN(x, self.h0) # out(1, f, 4096) & (num_layers=1, 1, 4096)
        x = torch.squeeze(h, 0) # (1, 4096)        
        
        x = self.classifier(x) # out shape : (1, 11)
        return x
    
    def load_pretrained(self):
        import torchvision.models as models
        backend_model = None
        try:
            if self.backend == 'vgg13' :
                backend_model = models.vgg13(pretrained=self.pretrained)
            elif self.backend == 'vgg13_bn' :
                backend_model = models.vgg13_bn(pretrained=self.pretrained)
            elif self.backend == 'vgg16' :
                backend_model = models.vgg16(pretrained=self.pretrained)
            elif self.backend == 'vgg16_bn':
                backend_model = models.vgg16_bn(pretrained=self.pretrained)
            
            
            else :
                raise ValueError("[ERROR] Unexpected backend name pass through previous check then into load_pretrained() .")
            # copy features flow
            self.features = copy.deepcopy(backend_model.features) 
            self.avgpool = copy.deepcopy(backend_model.avgpool)
            print("[INFO] load pretrained features successfully, backend : %s" % self.backend)
        except Exception as e:
            print(e)
    
    def create_RNN(self, rnn='GRU', hidden_size=4096, num_layers=1, batch_first=True):
        '''
        output (batch, seq, hidden_size)
        h_out (n_layer, batch, hidden_size)
        '''
        try:
            input_size = None
            if self.backend in ['vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn']:
                input_size = 25088
            else :
                raise ValueError("[ERROR] Unexpected backend name pass through previous check then into create_outLayer() .")
            
            if rnn == 'GRU' :
                self.RNN = nn.GRU(
                    input_size=input_size,
                    hidden_size=hidden_size,
                    num_layers=num_layers, 
                    batch_first=batch_first,
                )
                self.h0 = torch.zeros((num_layers,1,hidden_size)) # bach_size = 1
            
            else :
                raise ValueError("[ERROR] Unexpected rnn '%s', please select one in ['GRU']" & rnn)
                
            print("[INFO] create RNN component successfully, rnn : %s ." % rnn)
        except Exception as e:
            print(e)
        
        
    def create_classifier(self, n_label=11):
        try:
            if self.backend in ['vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn'] :
                self.classifier = nn.Sequential(
                    nn.Linear(4096, 4096),
                    nn.ReLU(inplace=True),
                    nn.Dropout(0.5),
                    nn.Linear(4096, 1000),
                    nn.ReLU(inplace=True),
                    nn.Dropout(0.5),
                    nn.Linear(1000, 11),
                    nn.Softmax(),
                )
            else :
                raise ValueError("[ERROR] Unexpected backend name pass through previous check then into create_outLayer() .")
        
            print("[INFO] create classifier successfully.")
        except Exception as e:
            print(e)
                
    def fix_features(self): # fix features weights
        for param in self.features.parameters():
            param.requires_grad = False


model = MFrnnVGG(backend='vgg16_bn')

In [None]:
input = torch.randn(4, 3, 240, 320) # (frame, channel, height, weight)
a = model(input)
print(a.shape)

In [None]:
a = torch.ones((2,3))
a.unsqueeze(0)
print(a.shape, b.shape)

In [None]:
input_size = 30

h_size = 20
batch = 5
n_layer = 2

gru=nn.GRU(input_size=input_size,hidden_size=h_size,num_layers=n_layer, batch_first=True) #  (each_input_size, hidden_state, num_layer)

input=torch.randn(batch,3,input_size) # ( batch, seq_len, input_size)
h0=torch.ones(n_layer,batch,h_size) # (num_layers, batch, hidden_size)
output,hn=gru(input,h0)
print(output.size(),hn.size())

# train model

In [None]:
# GPU is useless when batch size = 1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from torch import FloatTensor, LongTensor


epochs = 100
lr=1e-3

criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
model.to(device)


for epoch in range(epochs):
    start_time = time()
    total_loss = 0.
    acc = 0.
    cnt = 0
    
    for idx, (x, y) in enumerate(zip(train_x, train_y)):
        x = np.transpose(x, (0,3,1,2)) # transpose for torch input : shape (f, 240, 320, 3) --> (f, 3, 240, 320)
        y = np.array([y]) # shape (1,)
        
        try: 
            x = Variable(FloatTensor(x)).to(device)
            y = Variable(LongTensor(y)).to(device)
        
            optimizer.zero_grad()
            pred = model(x)
            loss = criterion(pred, y)
            loss.backward()
            optimizer.step()
            
            acc += 1. if pred.argmax().item() == y.argmax().item() else 0.
            total_loss += loss.item()
            cnt += 1
        except Exception as e:
            print('[ERROR] idx : %d, error msg : %s ' % (idx, e))
        
    print('[INFO] epoch (%d/%d), cost: %d sec | loss : %.6f | acc : %.2f' % (epoch, epochs, (time()-start_time), (total_loss/cnt), (100*acc/cnt)))


In [None]:
# 0epoch
pred

In [None]:
model.classifier.state_dict()

In [None]:
# 0epoch
model.features.state_dict()