In [1]:
import torch , torchvision
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms
import time
from collections import OrderedDict
from torchvision import models
import pytz
from datetime import datetime

from custom_dataset import Train_Dataset, Valid_Dataset, Test_Dataset

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Hyperparameters
features = 64
num_layers = 10 # the sequence 
hidden_size = 32
batch_size = 8
lr = 0.001
epoch = 1
device = 'cuda'

In [3]:
class CnnGru(nn.Module):
    def __init__(self,features,num_layers,hidden_size,batch_size):
        super(CnnGru,self).__init__()
        self.features = features
        self.num_layers = num_layers # the sequence 
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        
        # CNN
        self.conv2d_1 = nn.Conv2d(3, 16, 3, 1, 1) # 224 x224
        self.conv2d_2 = nn.Conv2d(16, 32, 3, 1, 1) # 112 x112
        self.conv2d_3 = nn.Conv2d(32, 64, 3, 1, 1) # 

        self.maxPool2d_1 = nn.MaxPool2d((2, 2))
        self.maxPool2d_2 = nn.MaxPool2d((2, 2))
        self.maxPool2d_3 = nn.MaxPool2d((2, 2))

        self.flatten_1 = nn.Flatten()
        self.fc1 = nn.Linear(50176,features)

        # RNN
        self.grucell = nn.GRUCell(input_size=features, hidden_size=hidden_size) #, num_layers = num_layers, batch_first = True)
        self.fc2 = nn.Linear(hidden_size, 1)  
        
    def forward_cnn(self, x: torch.Tensor):
        x = F.relu(self.conv2d_1(x))
        x = self.maxPool2d_1(x)
        x = F.relu(self.conv2d_2(x))
        x = self.maxPool2d_2(x)
        x = F.relu(self.conv2d_3(x))
        x = self.maxPool2d_3(x)
        x = self.flatten_1(x)
        x = self.fc1(x) 
        x = F.relu(x)
        return x

    def forward_rnn(self, x: torch.Tensor):
        output = []
#         h0 = torch.zeros(self.num_layers,x.size(0),self.hidden_size).to(device)
#         print(f'input in rnn:{x.shape}')
#         print(f'input of h0: {h0.shape}')
#         out,_ = self.gru(x,h0)
#         print(f'out shape:{out.shape}')
#         out = out.reshape(out.shape[0],-1)
#         out = self.fc2(out)
#         print(f'rnn output:{out.shape}')
#         out = torch.squeeze(out)
#         out = out.permute(1,0)
#         print(f'reshape out shape: {out.shape}')

        #h0 = torch.zeros(x.size(0),self.hidden_size).to(device)
        h0 = torch.zeros(8,self.hidden_size).to(device)
        
        #print("ho: ", h0)
        for frame in x.split(1,dim=1):
           # print("frame: ", frame)
            frame = torch.squeeze(frame)
#             print(f'each frame: {frame.shape}')
            h0 = self.grucell(frame,h0)
            out =  self.fc2(h0)
            output += [out]
        output = torch.cat(output, dim=1)
#         print(f'output shape:{output.shape}')
        # 8x10x1
        return output

    def forward(self, x: torch.Tensor):
        batch_size, timesteps, C, H, W = x.size()
        gru_in = torch.zeros(batch_size, timesteps, self.features).to(device)
#         print(f'x.shape is:{x.shape}')
        
        for i in range(self.num_layers):
            temp = x[:,i,:,:,:]
#             print(f'temp shape: {temp.shape}')
            temp = self.forward_cnn(temp)
#             print(f'cnn output: {temp.shape}')
            gru_in[:,i,:] = temp
            
#         print(f'gru_in: {gru_in}')  
#         print(f'gru_in shape: {gru_in.shape}')
        
        output = self.forward_rnn(gru_in)

        return output

In [4]:
train_dataset = Train_Dataset()
valid_dataset = Valid_Dataset()
test_dataset = Test_Dataset()

train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test_dataset, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(valid_dataset, batch_size = batch_size, shuffle = True)
print_every = len(train_loader)

In [5]:
# dataiter = iter(train_loader)
# f, a, v = dataiter.next()
# print(a.shape)

In [6]:
def validation_arousal(model, validloader, criterion, device):
    test_loss = 0

    for f,a,v in validloader:
        f,a,v = f.to(device), a.to(device), v.to(device)

        output = model.forward(f)
        test_loss += criterion(output, a).item()

    return test_loss

In [7]:
def train_arousal(model, n_epochs=5):
    print(f'Training custom CNN-GRU model to predict arousal values')
    print(f'total epochs: {n_epochs}')
    start = time.time()
    model_name = 'arousal model'
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()
    
    steps = 0 
    train_loss = 0
    val_loss_lst=[]
    train_loss_lst=[]
    Singapore = pytz.timezone('Asia/Singapore')
    
    for epoch in range(1, n_epochs + 1):
        # Training
        model.train()
        batch = 1
        for frames, arousal, valence in train_loader:
            print(f'epoch:{epoch} batch:{batch}')
            frames, arousal, valence = frames.to(device),arousal.to(device),valence.to(device)
            steps+=1
            
            optimizer.zero_grad()
            print(frames.shape)
            output = model.forward(frames)
#             print(f'output is: {output}')
            # getting loss
            loss = criterion(output, arousal)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()

            # At the end of every epoch ... (print_every is the length of train_loader)
            if steps % print_every == 0:
                # Eval mode
                model.eval()
                # Turn off gradients for validation
                with torch.no_grad():
                    val_loss = validation_arousal(model, val_loader, criterion, device)

                print("Epoch: {}/{} - ".format(epoch, n_epochs),
                      "Time: {} ".format(datetime.now(Singapore)),
                      "Training Loss: {:.3f} - ".format(train_loss/len(train_loader)),
                      "Validation Loss: {:.3f} - ".format(val_loss/len(val_loader)))               
                    
#                 print("Epoch: {}/{} Train Loss: {:.6f} Validation Loss: {:.6f}".format(epoch, n_epochs, train_loss/len(train_loader), val_loss/len(val_loader)))
#                 if accuracy > best_accuracy:
#                     best_accuracy = accuracy
#                     torch.save(model.state_dict(), "model.pth")
                                                  
                val_loss_lst.append(val_loss/len(val_loader))
                train_loss_lst.append(train_loss/len(train_loader))
                train_loss = 0
            batch+=1
    
    print('model:', model_name,'- epochs:', n_epochs)
    print(f"Run time: {(time.time() - start)/60:.3f} min")
    
    return model, train_loss_lst, val_loss_lst

In [8]:
arousal_model, train_lost, val_lost = train_arousal(model = CnnGru(features,num_layers,hidden_size,batch_size), n_epochs = epoch)

Training custom CNN-GRU model to predict arousal values
total epochs: 1
epoch:1 batch:1
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:2
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:3
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:4
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:5
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:6
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:7
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:8
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:9
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:10
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:11
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:12
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:13
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:14
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:15
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:16
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:17
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:18
torch.Size([8, 10, 3, 224, 224])
epoch:1 batch:19
torch.Size([8, 10, 3

In [None]:
print(train_lost)
print(val_lost)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
model_name = 'arousal train'
e = [i for i in range(1, epoch+1)]
print(e)
#     train_loss = loss_acc[0]
#     val_loss = loss_acc[1]       
plt.plot(e,train_lost, label='Training Loss')
plt.plot(e,val_lost, label='Validation Loss')
plt.xticks(np.arange(min(e), max(e)+1, 1.0))
plt.legend()
plt.title(f'{model_name} loss',color='black')
plt.xlabel('epoch',color='black')
plt.ylim(ymin=0)
plt.ylabel('loss',color='black')
plt.tick_params(colors='black')
# plt.savefig(loss_graph,dpi=100,bbox_inches = 'tight')
plt.show()

In [11]:
# for frame, arousal, valence in test_loader:
#     print(arousal[0])
#     pred = arousal_model(frame[0])

tensor([0.0000, 0.0000, 0.0000, 0.0110, 0.0585, 0.0585, 0.0645, 0.0700, 0.0385,
        0.0385])


ValueError: not enough values to unpack (expected 5, got 4)

In [12]:
dataiter = iter(test_loader)
f, a, v = dataiter.next()
print(a)

tensor([[ 0.0015,  0.0217,  0.0298,  0.0335,  0.0140,  0.0450,  0.0590,  0.0450,
          0.0510,  0.0630],
        [-0.2195, -0.1995, -0.3235, -0.5670, -0.5670, -0.1060, -0.4200, -0.4200,
         -0.4320, -0.4320],
        [ 0.0980,  0.0220, -0.0340, -0.1050, -0.1615, -0.1930, -0.2475, -0.1645,
         -0.0310,  0.3650],
        [ 0.1330,  0.1880,  0.1090,  0.2190,  0.3380,  0.1090, -0.0550,  0.1880,
          0.2750,  0.2670],
        [ 0.1400,  0.0770,  0.2430,  0.1400,  0.1250,  0.1250,  0.2430,  0.2430,
          0.0930,  0.2110],
        [ 0.1030,  0.0810,  0.0655,  0.0855,  0.0735,  0.0455,  0.0385,  0.0935,
          0.1090,  0.1010],
        [ 0.1560,  0.3140,  0.2750,  0.2270,  0.2040,  0.1960,  0.0770,  0.0460,
          0.0380, -0.0070],
        [ 0.0000,  0.0000,  0.0000,  0.0110,  0.0585,  0.0585,  0.0645,  0.0700,
          0.0385,  0.0385]])


In [25]:
f.shape[0]

8

In [13]:
f = f.to(device)
f[0].shape
#arousal_model(f[0:1])

torch.Size([10, 3, 224, 224])

In [21]:
for frame in f[0:1].split(1,dim=1):
    print(frame.shape)
    frame = torch.squeeze(frame)
    print(frame.shape)

torch.Size([1, 1, 3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([1, 1, 3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([1, 1, 3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([1, 1, 3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([1, 1, 3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([1, 1, 3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([1, 1, 3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([1, 1, 3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([1, 1, 3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([1, 1, 3, 224, 224])
torch.Size([3, 224, 224])


In [15]:
arousal_model(f[0:1])

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)