In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau

import os
import numpy as np
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%pwd

In [None]:
%cd drive/MyDrive/2022DimitriosGagatsis_Thesis/src-seq

In [None]:
%ls

In [None]:
# !unzip data_seq.zip

# Dataset 

In [None]:
# Code from dataset.py
class VideoDataset(Dataset):
    def __init__(self, sequences_folder, seq_length):
        self.sequences_folder = sequences_folder
        self.seq_length = seq_length
        self.sequences = []
        self.labels = []

        # walk through the sequences folder and load all the sequences
        for dirpath, dirnames, filenames in os.walk(sequences_folder):
            # print(dirpath)
            # print(dirnames)
            # print(filenames)
            for filename in filenames:
                if filename.endswith('.npy'):
                    sequence_path = os.path.join(dirpath, filename)
                    sequence = np.load(sequence_path, allow_pickle=True)
                    self.sequences.append(sequence)
                    
                    # get label from the folder name
                    label = os.path.basename(os.path.dirname(dirpath))
                    self.labels.append(label)

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        sequence = self.sequences[idx]
        label = self.labels[idx]
        
        if len(sequence) > self.seq_length:
            start_idx = np.random.randint(len(sequence) - self.seq_length + 1)
            end_idx = start_idx + self.seq_length
            sequence = sequence[start_idx:end_idx]
        elif len(sequence) < self.seq_length:
            sequence = np.pad(sequence, ((0, self.seq_length - len(sequence)), (0, 0), (0, 0), (0, 0)), 'constant')
            
        # Convert labels to integers to convert them to tensors later.
        if label == 'fake_angry':
            label = 0
        if label == 'fake_contempt':
            label = 1
        if label == 'fake_disgust':
            label = 2
        if label == 'fake_happy':
            label = 3
        if label == 'fake_sad':
            label = 4
        if label == 'fake_surprise':
            label = 5
        if label == 'real_angry':
            label = 6
        if label == 'real_contempt':
            label = 7
        if label == 'real_disgust':
            label = 8
        if label == 'real_happy':
            label = 9
        if label == 'real_sad':
            label = 10
        if label == 'real_surprise':
            label = 11

        return torch.tensor(sequence), torch.tensor(label)



def remove_ds_store(directory):
    for dirpath, dirnames, filenames in os.walk(directory):
        # Remove .DS_Store files in the current directory
        if ".DS_Store" in filenames:
            os.remove(os.path.join(dirpath, ".DS_Store"))
            print(f"Removed .DS_Store file from {dirpath}")

        # Recursively remove .DS_Store files in subdirectories
        for dirname in dirnames:
            subdir = os.path.join(dirpath, dirname)
            remove_ds_store(subdir)

            
            
def get_data_loaders(train_sequences_folder, val_sequences_folder, seq_length, batch_size, num_workers):
    train_dataset = VideoDataset(train_sequences_folder, seq_length)
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    
    val_dataset = VideoDataset(val_sequences_folder, seq_length)
    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    return train_dataloader, val_dataloader
    


In [None]:
# Get the data:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device: ", device)

train_dataloader, val_dataloader = get_data_loaders('train_seq',
                                                    'val_seq',
                                                    seq_length=20,
                                                    batch_size=8,
                                                    num_workers=2)


In [None]:
train_dataloader.shape

In [None]:
# requirements for src-seq
# torch
# matplotlib
# numpy
# pandas
# opencv-python

!pip install git+https://github.com/facebookresearch/fvcore.git

# Models

In [None]:

class MODEL_3DCNN(nn.Module):
    def __init__(self, num_classes):
        super(MODEL_3DCNN, self).__init__()
        self.conv1 = nn.Conv3d(3, 16, kernel_size=(3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2))
        self.bn1 = nn.BatchNorm3d(16)
        self.relu1 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
        self.conv2 = nn.Conv3d(16, 32, kernel_size=(3, 5, 5), stride=(1, 1, 1), padding=(1, 2, 2))
        self.bn2 = nn.BatchNorm3d(32)
        self.relu2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
        self.conv3 = nn.Conv3d(32, 64, kernel_size=(3, 5, 5), stride=(1, 1, 1), padding=(1, 2, 2))
        self.bn3 = nn.BatchNorm3d(64)
        self.relu3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(125440, 512)
        self.relu4 = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu3(x)
        x = self.pool3(x)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.relu4(x)
        x = self.fc2(x)
        return x




# LSTM Model - simple LSTM model    -> It started working!!! (Slow)
class MODEL_LSTM(nn.Module):
    def __init__(self, num_classes):
        super(MODEL_LSTM, self).__init__()
        self.lstm1 = nn.LSTM(1003520, 256, num_layers=2, batch_first=True)
        self.fc1 = nn.Linear(256, 512)
        self.relu1 = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(512, num_classes)
        
    def forward(self, x):
        batch_size, timesteps, C, H, W = x.size()
        x = x.view(batch_size, timesteps, -1)
        x, _ = self.lstm1(x)
        x = x[:, -1, :]
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        return x
    
    

# GRU Model - simple GRU model
class MODEL_GRU(nn.Module):
    def __init__(self, num_classes):
        super(MODEL_GRU, self).__init__()
        self.gru1 = nn.GRU(input_size=3010560, hidden_size=256, num_layers=1, batch_first=True)
        self.fc1 = nn.Linear(256, 128)
        self.relu1 = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        # Reshape the input to (batch_size, sequence_length, input_size)
        x = x.reshape(x.size(0), 1, -1)
        # Pass through the GRU layer
        x, _ = self.gru1(x)
        # Flatten the output and pass through fully connected layers
        x = x.reshape(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        return x


# X3D Model from X3D networks pretrained on the Kinetics 400 dataset 
# PyTorchVideo: https://pytorch.org/hub/facebookresearch_pytorchvideo_x3d/
# Choose the `x3d_s` model
# model_name = 'x3d_s'
# model = torch.hub.load('facebookresearch/pytorchvideo', model_name, pretrained=True)

class MODEL_X3D(nn.Module):
    def __init__(self, num_classes):
        super(MODEL_X3D, self).__init__()
        self.model = torch.hub.load('facebookresearch/pytorchvideo', 'x3d_s', pretrained=True)
        self.model.fc = nn.Linear(2048, num_classes)

    def forward(self, x):
        x = self.model(x)
        return x


# 3D RESNET pretrained on Kinetics dataset.
# Try this one: 3D RESNET https://pytorch.org/hub/facebookresearch_pytorchvideo_resnet/
# model = torch.hub.load('facebookresearch/pytorchvideo', 'slow_r50', pretrained=True)

class MODEL_3D_RESNET(nn.Module):
    def __init__(self, num_classes):
        super(MODEL_3D_RESNET, self).__init__()
        self.model = torch.hub.load('facebookresearch/pytorchvideo', 'slow_r50', pretrained=True)
        self.model.fc = nn.Linear(2048, num_classes)

    def forward(self, x):
        x = self.model(x)
        return x


class MODEL_efficient_x3d(nn.Module):
    def __init__(self, num_classes):
        super(MODEL_efficient_x3d, self).__init__()
        self.model = torch.hub.load('facebookresearch/pytorchvideo', 'efficient_x3d_s', pretrained=True)
        self.model.fc = nn.Linear(400, num_classes)

    def forward(self, x):
        x = self.model(x)
        return x





# Utils

In [None]:
# Code from utils.py
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

def save_model(epochs, model, optimizer, criterion):
    torch.save({
        'epoch': epochs,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': criterion,
    }, 'model.pth')


def save_plots(train_acc, valid_acc, train_loss, valid_loss):
    """
    Function to save the loss and accuracy plots to disk.
    """
    # Accuracy plots.
    plt.figure(figsize=(10, 7))
    plt.plot(
        train_acc, color='green', linestyle='-', 
        label='train accuracy'
    )
    plt.plot(
        valid_acc, color='blue', linestyle='-', 
        label='validataion accuracy'
    )
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig(f"accuracy.png")
    
    # Loss plots.
    plt.figure(figsize=(10, 7))
    plt.plot(
        train_loss, color='orange', linestyle='-', 
        label='train loss'
    )
    plt.plot(
        valid_loss, color='red', linestyle='-', 
        label='validataion loss'
    )
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(f"loss.png")



def print_model_parameters(model):
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    non_trainable_params = sum(p.numel() for p in model.parameters() if not p.requires_grad)

    print(f"Trainable parameters: {trainable_params}")
    print(f"Non-trainable parameters: {non_trainable_params}")
    
def freeze_baseline(model):
	print("-"*50)
	for param in model.parameters():
		param.requires_grad = False
        
	for param in model.fc.parameters():
		param.requires_grad = True
	return model

def ConfusionMatrix(net, test_loader, dataset_classes):
    '''Function to plot confusion matrix'''
    y_pred = []
    y_true = []
    # iterate over test data
    for inputs, labels in test_loader:
            output = net(inputs.cuda()) # Feed Network - Remove .cuda() for CPU usage

            output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
            y_pred.extend(output) # Save Prediction
            
            labels = labels.data.cpu().numpy()
            y_true.extend(labels) # Save Truth

    # constant for classes
    classes = dataset_classes

    # Build confusion matrix
    cf_matrix = confusion_matrix(y_true, y_pred)
    df_cm = pd.DataFrame(cf_matrix/np.sum(cf_matrix) *10, index = [i for i in classes],
                        columns = [i for i in classes])
    plt.figure(figsize = (12,7))
    sns.heatmap(df_cm, annot=True)
    plt.savefig('conf_matrix.png')




# Model, Optimizer, Loss, Epochs

In [None]:
## Define the Model

# model = MODEL_3DCNN(num_classes=12).to(device)

# model = MODEL_LSTM(num_classes=12).to(device)

# model = MODEL_GRU(num_classes=12).to(device)

# model = MODEL_X3D(num_classes=12).to(device)


model = MODEL_3D_RESNET(num_classes=12).to(device)


# model = MODEL_efficient_x3d(num_classes=12).to(device)

print("*Model parametres before freezing*")
print_model_parameters(model)

# freeze_baseline(model)
# print("*Model parametres after freezing*")
# print_model_parameters(model)


# Compile the model
# model = torch.compile(model)

## Loss Function
criterion = nn.CrossEntropyLoss()

##Optimizer
# optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.1)
# optimizer = optim.Adagrad(model.parameters(), lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10)
# optimizer = optim.RMSprop(model.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)


## LR-Scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

## Set number of epochs
num_epochs = 60

# Training

In [None]:
## Train & Validate the model

train_loss = []
train_acc = []
val_loss = []
val_acc = []
for epoch in range(num_epochs):
    print("\nTraining...")
    train_running_loss = 0.0
    train_running_correct = 0
    counter = 0
    running_loss = 0.0

    for i, data in tqdm(enumerate(train_dataloader), total=len(train_dataloader)):
        inputs, labels = data
        inputs = inputs.permute(0, 4, 1, 2, 3)
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        # Calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        train_running_correct += (predicted == labels).sum().item()
        # Calculate Loss
        loss = criterion(outputs, labels)
        train_running_loss += loss.item()
        loss.backward()
        optimizer.step()
        counter += 1

    epoch_loss = train_running_loss / counter
    epoch_acc = 100. * (train_running_correct / len(train_dataloader.dataset))
    train_loss.append(epoch_loss) # for plotting
    train_acc.append(epoch_acc) # for plotting
    print('Epoch [%d], Training Loss: %.4f, Training Accuracy: %.4f' % (epoch+1, epoch_loss, epoch_acc))


    # Evaluate the model on the validation set
    correct = 0
    total = 0
    running_loss = 0.0
    print("Validating...")
    with torch.no_grad():
        for i, data in tqdm(enumerate(val_dataloader), total=len(val_dataloader)):
            inputs, labels = data
            inputs = inputs.permute(0, 4, 1, 2, 3)
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            correct += (predicted == labels).sum().item()
            
    epoch_loss = running_loss / total
    epoch_acc = 100. * (correct / len(val_dataloader.dataset))
    val_loss.append(epoch_loss) # for plotting
    val_acc.append(epoch_acc) # for plotting
    print('Epoch [%d], Validation Loss: %.4f, Validation Accuracy: %.4f' % (epoch+1, epoch_loss, epoch_acc))
    scheduler.step(epoch_loss)

# Plot the loss and accuracy curves
save_plots(train_acc, val_acc, train_loss, val_loss)
# save the model
save_model(num_epochs, model, optimizer, criterion)

# Evaluate

In [None]:
## Evaluate the model on validation set.
path = 'model.pth' # set the path

loaded_model = model

loaded_checkpoint = torch.load(path)
loaded_model.load_state_dict(loaded_checkpoint['model_state_dict'])
optimizer.load_state_dict(loaded_checkpoint['optimizer_state_dict'])
epoch = loaded_checkpoint['epoch']

loaded_model.eval()
correct = 0
total = 0
print("Evaluate the model.")
with torch.no_grad():
    running_loss = 0.0
    for i, data in tqdm(enumerate(val_dataloader), total=len(val_dataloader)):
        inputs, labels = data
        inputs = inputs.permute(0, 4, 1, 2, 3)
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total
    epoch_acc = 100. * (correct / len(val_dataloader.dataset))
    print(f"Test loss: {epoch_loss:.3f}, Test acc: {epoch_acc:.3f}")