In [1]:
import os
import sys

import pickle
import numpy as np
import pandas as pd

from PIL import Image
from pathlib import Path
from random import shuffle
import matplotlib.pyplot as plt

import torch 
import torchvision
import torch.nn as nn
import torch.utils.data as data
import torch.nn.functional as F
import torchvision.models as models
from torch.autograd import Variable
import torchvision.transforms as transforms

from skimage.transform import resize
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import accuracy_score, mean_absolute_error, r2_score, f1_score
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVR, SVC

%matplotlib inline

In [2]:
torch.device(0)
os.environ["CUDA_VISIBLE_DEVICES"]= "0"

# Data Loader

In [3]:
class Dataset_Loader(data.Dataset):
    def __init__(self, data_path, folders, labels, transform=None):
        self.data_path = data_path
        self.labels = labels
        self.folders = folders
        self.transform = transform

    def __len__(self):
        return len(self.folders)

    def read_images(self, path, selected_folder, use_transform):
        X = []

        for i in range(0,15):        
            image = Image.open(path+selected_folder+"/frame"+str(i+1)+".jpg")
        
            if use_transform is not None:
                image = use_transform(image)

            X.append(image.squeeze_(0))
        X = torch.stack(X, dim=0)
        X = X.permute(1,0,2,3)
        return X

    def __getitem__(self, index):
        folder = self.folders[index][:-4]
        X = self.read_images(self.data_path, folder, self.transform)
        y = torch.FloatTensor(self.labels[index])

        return X, y

# 3D CNN Model

In [4]:
# 3D CNN model using Video Resnet 18 pretrained
class CNNModel3D(nn.Module):
    def __init__(self, out= 6, pretrained=True):
        super(CNNModel3D, self).__init__()
        
        self.VideoResNet = models.video.r3d_18(pretrained)
        num_features = self.VideoResNet.fc.in_features

        modules = list(self.VideoResNet.fc.children())
        modules.extend([nn.Linear(num_features, 128)]) 
        modules.extend([nn.ReLU(inplace=True)]) 
        modules.extend([nn.Linear(128, 32)])
        modules.extend([nn.ReLU(inplace=True)]) 
        modules.extend([nn.Linear(32, out)]) 
        
        self.VideoResNet.fc = nn.Sequential(*modules)
        
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x_3d):
        
        x = self.VideoResNet(x_3d) 
        x = self.sigmoid(x)
            
        return x

In [5]:
# Train Function
def train(model, device, train_loader, optimizer, epoch, log_interval):
    model.train()

    losses = []
    N_count = 0 
    for batch_idx, (X, y) in enumerate(train_loader):
        X, y = X.to(device), y.to(device)
        
        N_count += X.size(0)

        optimizer.zero_grad()
        output = model(X) 
        
        criterion = nn.MSELoss(reduction = 'sum')
        loss = criterion(output, y)
        losses.append(loss.item())
        
        loss.backward()
        optimizer.step()
        
        if (batch_idx + 1) % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\t\tLoss: {:.6f}'.format(
                epoch + 1, N_count, len(train_loader.dataset), 100. * (batch_idx + 1) / len(train_loader), loss.item()))

    return losses

# Evaluation Function
def evaluation(model, device, loader):
    model.eval()
    
    loss = 0
    all_y = []
    all_y_pred = []
    score = []
    with torch.no_grad():
        for (X, y) in loader:
            X, y = X.to(device), y.to(device)

            output = model(X)
            
            criterion = nn.MSELoss(reduction = 'sum')
            loss = criterion(output, y)
            loss += loss.item() 
            
            # collect all y and y_pred in all batches
            all_y.extend(y.cpu().detach().numpy())
            all_y_pred.extend(output.cpu().detach().numpy())

    loss /= len(loader.dataset)
    
    all_y = np.asarray(all_y)
    all_y_pred = np.asarray(all_y_pred)
    
    for i in range(all_y.shape[1]):
        score.extend([1 - mean_absolute_error(all_y[:,i], all_y_pred[:,i])])

    return loss.cpu().detach().numpy(), np.asarray(score), all_y, all_y_pred

# Setting up the Data Loading

In [6]:
#Set Path
training_data_path = "/home/ramsub/first-impressions/data/image_data/training_data/"    
validation_data_path = "/home/ramsub/first-impressions/data/image_data/validation_data/"
test_data_path = "/home/ramsub/first-impressions/data/image_data/test_data/"
save_model_path = "./saved_models/"

#Read CSV files
aud_train = pd.read_csv('../audio/pickle_files/training_df_all.csv')
aud_test = pd.read_csv('../audio/pickle_files/test_df_all.csv')
aud_val = pd.read_csv('../audio/pickle_files/validation_df_all.csv')

labels = ['interview_score', 'openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism']

train_label = aud_train[labels].values
train_list = aud_train['video_id'].values

test_label = aud_test[labels].values
test_list = aud_test['video_id'].values

val_label = aud_val[labels].values
val_list = aud_val['video_id'].values

# Setting up the CNN Model params

In [7]:
img_size = 112
epochs = 20
batch_size = 4
learning_rate = 1e-4
l_decay = 5e-4
log_interval = 25 

use_cuda = torch.cuda.is_available()                   # check if GPU exists
device = torch.device("cuda" if use_cuda else "cpu")   # use CPU or GPU
print(device)

train_params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 0, 'pin_memory': True} if use_cuda else {}
test_params = {'batch_size': batch_size, 'shuffle': False, 'num_workers': 0, 'pin_memory': True} if use_cuda else {}

transform = transforms.Compose([transforms.Resize([img_size, img_size]),
                                transforms.ToTensor(),
                                transforms.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]),
                               ])

cuda


In [8]:
train_set = Dataset_Loader(training_data_path, train_list, train_label, transform=transform)
train_loader = data.DataLoader(train_set, **train_params)

val_set = Dataset_Loader(validation_data_path, val_list, val_label, transform=transform)
val_loader = data.DataLoader(val_set, **test_params)

test_set = Dataset_Loader(test_data_path, test_list, test_label, transform=transform)
test_loader = data.DataLoader(test_set, **test_params)

model = CNNModel3D().to(device)
optimizer = torch.optim.Adam(model.VideoResNet.parameters(), lr=learning_rate)

val_losses = []
val_scores = []

# Training

In [None]:
for epoch in range(epochs):
    train(model, device, train_loader, optimizer, epoch, log_interval)    
    
    epoch_val_loss, epoch_val_score, test_annot, test_val_pred = evaluation(model, device, val_loader)
    
    val_losses.append(epoch_val_loss)    
    val_scores.append(epoch_val_score)
    
    if min(val_losses) == epoch_val_loss:
        torch.save(model.state_dict(), os.path.join(save_model_path, 'resnet3d_model_trained.pth'))

# Evaluating

In [9]:
saved_model_t = CNNModel3D().to(device)
saved_model_t.load_state_dict(torch.load(os.path.join(save_model_path, 'resnet3d_model_trained.pth')))

test_loss, test_score, test_annot, test_annot_pred = evaluation(saved_model_t, device, test_loader)
print('\nTesting loss: {:.7f}\n Interview: {:.4f}\n openness: {:.4f}\n conscientiousness: {:.4f}\n extraversion: {:.4f}\n agreeableness: {:.4f}\n neuroticism: {:.4f}'.format(test_loss, test_score[0],test_score[1],test_score[2],test_score[3],test_score[4],test_score[5]))


Testing loss: 0.0002155
 Interview: 0.9140
 openness: 0.9089
 conscientiousness: 0.9148
 extraversion: 0.9086
 agreeableness: 0.9096
 neuroticism: 0.9057
