Doing a couple of imports

In [None]:
import torch
import numpy as np
import torch.nn as nn
import pickle
from matplotlib import pyplot as plt
from torchvision import transforms
from torch.autograd import Variable
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import TensorDataset
import PretrainedCNN
import VideoPooling as VP
import PredictionHead as PH
import VideoDataLoader as VDL
import DataPreprocessing as DP
#from solver import Solver

Path to datasets. Identifies the toplevel folder. 

In [None]:
path_small = r'C:\Users\Janis\FinalDeepLearningProject\TUM_DeepLearningProject-master\datasets\Frames_for _Damian\Frames_for _Damian_small'
path = r'C:\Users\Janis\FinalDeepLearningProject\TUM_DeepLearningProject-master\datasets\Frames_for _Damian\Frames_for _Damian'

Load the video data into dictionary data. 
The dictionary has three entries:
    data['data']  -> torch.FloatTensor with video frames
    data['targets'] -> np array with classes
    data['video_frames'] -> np array with number of frames belonging to each video
. If we already loaded the pictures, normalized them and stored them in a pickle file, we can load it form the pickle.

In [None]:
#data = VDL.load_videos(path, resize_images=True)
#data = pickle.load(open('data_small_norm_56.p','rb'))
data = pickle.load(open('data_norm_56.p','rb'))

The following two steps just need to be exucted if the data is loaded for the first time. Normalize data -> zero mean, std = 1

In [None]:
data['data'] = DP.normalize(data['data'])

Dump the result in a pickle file for later usage

In [None]:
pickle.dump(data, open('data_norm_56.p','wb') )

Run all the training example through the CNN and apply a pooling strategy for receiving a video representation. Due to memory issues, we store the result in a pickle file after every 5 trainingexamples. 

In [None]:
#Number of traning examples
N = data['targets'].shape[0]

#Customized dataloader. 
batchloader = VDL.iterate_videos(data)

#Get fully convolutional network for feature extraction on frame level
pretrained_model = PretrainedCNN.Fully_Conv_Block('vgg11')

#Check whether cua is available and utilize if possible
if torch.cuda.is_available():
    pretrained_model.cuda()

#Some funny starting signal
print('gOooo o_O O_o')

#iterate over training examples
for i in range(0,N):
    
    #store out of convolutional network after every five examples
    #and dump the variable. This is done to prevent OutOfMemoryError
    if i!=0 and i%5==0:
        pickle.dump(data_vid_rep, open('out' + str(i) + '.p','wb'))
        del data_vid_rep
    
    print('...forwarding frame ' + str(i) + '...')
    
    #next batch
    batch, _ = batchloader.__next__()
    
    #cast to variable
    batch_var = Variable(batch)
    
    #Check whether cua is available and utilize if possible
    if torch.cuda.is_available():
        batch_var = batch_var.cuda()
    
    #output of CNN
    xout = pretrained_model(batch_var)
    
    #apply pooling strategy.
    #here we use simple average pooling
    xout = VP.average_pooling(xout)
    
    #store the output in data_vid_rep
    if i == 0 or i%5==0:
        data_vid_rep = xout
        data_vid_rep = data_vid_rep.view(1,data_vid_rep.size()[0])
    else:
        data_vid_rep = torch.cat((data_vid_rep, xout.view(1,xout.size()[0])),0) 
        
    del xout

#store the output of the last iteration in a pickle file
pickle.dump(data_vid_rep, open('out' + str(N) + '.p','wb'))

Load CNN output and merge the above produced output into a single tensor

In [None]:
N = data['targets'].shape[0]

for i in range(0,N+1):
    if i!=0 and (i%5==0 or i==N):
        print('...loading frames ' + str(i) + '...')
        temp = pickle.load(open('out' + str(i) + '.p','rb'))
        if i==5:
            print(temp.shape)
            conv_out = temp
        else:
            conv_out = torch.cat((conv_out, temp),0) 
            print(conv_out.shape)

Dump into pickle file for later usage

In [None]:
pickle.dump(conv_out.cpu().data.numpy(),open('conv_out_56_all.p','wb'))

Load the output of the CNN from a pickle file and split the training and the validation data

In [None]:
#load form pickle
conv_out = pickle.load(open('conv_out_56_all.p','rb'))

#number training example
N = conv_out.shape[0]

#separate validation from training set
idx_val = np.random.choice(np.arange(0,N), size=int(N/8), replace=False)
val_data = conv_out[idx_val]
val_targets = data['targets'][idx_val]
train_data = np.delete(conv_out, idx_val, axis=0)
train_targets = np.delete(data['targets'], idx_val, axis=0)

#store training and validation data in TensorDataset object
train_dataset = TensorDataset(torch.from_numpy(train_data),torch.from_numpy(train_targets))
val_dataset = TensorDataset(torch.from_numpy(val_data),torch.from_numpy(val_targets))

Now we can train a classifier on the pooled output of the CNN to predict the genre of the video game.

In [None]:
# Defining hyperparameters
epochs = 200
learning_rate = 0.01
output_classes = 2

# define the network that is being used for prediction
pred_head = PH.ThreeLayerFCN([conv_out.shape[1],int(conv_out.shape[1]/2),output_classes])

#Check whether cua is available and utilize if possible
if torch.cuda.is_available():
    pred_head.cuda()

# Get a dataloader for training and validation data    
train_dataloader = DataLoader(train_dataset, batch_size=1)
val_dataloader = DataLoader(val_dataset, batch_size=1)

# Used optimization method + its parameters
optimizer = torch.optim.SGD(pred_head.parameters(), lr=learning_rate, momentum=0.9)

# store the history of the learning process
loss_history = []
pred_scores_history = []

#iterate over epochs
for i in range(1,epochs):
    
    #iterate over batches
    for j, (inputs, targets) in enumerate(train_dataloader):
        
        #cast to variable
        inputs = Variable(inputs)
        targets = Variable(targets)
        
        #Check whether cua is available and utilize if possible
        if torch.cuda.is_available():
            inputs = inputs.cuda()
            targets = targets.cuda()
        
        # Zero the gradients of the model parameters
        optimizer.zero_grad()
        
        # Compute the output of the model
        outputs = pred_head(inputs)
        
        # Used loss function
        loss_func=torch.nn.CrossEntropyLoss()
        
        # Compute loss with respect to the targets
        loss = loss_func(outputs,targets)
        
        # compute gradients of parameters
        loss.backward()
        
        # update parameters
        optimizer.step()
        
        loss_history.append(loss)
   
    # iterate over validation set and and  
    pred_scores = []
    for i, (inputs, targets) in enumerate(val_dataloader):
        inputs = Variable(inputs)
        targets = Variable(targets)
        
        #Check whether cua is available and utilize if possible
        if torch.cuda.is_available():
            inputs = inputs.cuda()
            targets = targets.cuda()
        
        outputs = pred_head(inputs)
        
        # get the predition -> index of maximum of the output
        _, preds = torch.max(outputs,1)

        # zero for wrong and 1 for correct calissification
        scores = (preds.cpu() == targets.cpu()).data.numpy()

        pred_scores.append(scores)

    # store the accuracy for each epoch
    pred_scores_history.append(np.mean(pred_scores))
    print(np.mean(pred_scores))

In [None]:
# plot accuracy on validation set depending on the amounts of epochs
plt.plot(pred_scores_history)
plt.show()