### Dependencies Setup

In [38]:
import time
import re
import os
import pandas as pd
import torch
import torch.nn as nn
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, random_split, DataLoader
from skimage import io
from skimage.transform import resize
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [35]:
api_key_auth3 = 'fad9ac13c7b36b3e05f6b63be16e74f0'
path_prefix = './'
img_size = 250

### Change directory for importing from google drive

In [36]:
from google.colab import drive
# This will prompt for authorization.
drive.mount('/content/drive')
path_prefix = '/content/drive/My Drive/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Data setup

In [26]:
def img_rename(img_name):
    return re.sub(r'\W+', ' ', img_name).lower().strip().replace(' ', '+') + '.jpg'

def resize_poster(img, target_size):
    img_resized = resize(img, (img_size, int(img.shape[1] * (target_size / img.shape[0]))), anti_aliasing=True)
    pad_size_1 = (img_size - img_resized.shape[1]) // 2
    pad_size_2 = img_size - img_resized.shape[1] - pad_size_1
    img_padded = np.pad(img_resized, [(0, 0), (pad_size_1, pad_size_2), (0, 0)], mode='constant', constant_values=0)
    return img_padded

Import movie data CSV and extend numpy array for poster images

In [29]:
md_df = pd.read_csv(path_prefix + 'MovieDataEnhanced.zip')
# Prepare feature values (convert strings and arrays to numeric values?)
movie_data = np.pad(md_df.to_numpy(), [(0, 0), (0, 1)], mode='constant', constant_values=np.nan)

Read poster images and add to numpy array

In [None]:
for i in range(movie_data.shape[0]):
    img_path = path_prefix + 'posters/' + img_rename(movie_data[i,7])
    img = resize_poster(io.imread(img_path), img_size)
    movie_data[i, -1] = img

In [33]:
print("Data shape: {}, Poster shape: {}".format(movie_data.shape, movie_data[0, -1].shape))

Data shape: (1407, 15), Poster shape: (250, 250, 3)


### Setup DNN Classes

#### Define base class



In [None]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))
    
class DNNBase(nn.Module):
    # training step
    def training_step(self, batch):
        img, targets = batch
        out = self(img)
        loss = F.cross_entropy(out, targets)
        acc = accuracy(out, targets)
        return loss, acc
    
    # validation step
    def validation_step(self, batch):
        img, targets = batch
        out = self(img)
        loss = F.cross_entropy(out, targets)
        acc = accuracy(out, targets)
        return {'val_acc':acc.detach(), 'val_loss':loss.detach()}
    
    # validation epoch end
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()
        return {'val_loss':epoch_loss.item(), 'val_acc':epoch_acc.item()}
        
    # print result end epoch
    def epoch_end(self, epoch, result):
        print("Epoch [{}] : train_loss: {:.4f}, train_acc: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}, test_acc: {:.4f}".format(
                epoch, result["train_loss"], result["train_acc"], result["val_loss"], result["val_acc"], result["test_acc"]
            ))

#### Define DNN class with CNN input

In [None]:
class PreTrainedResnet18(DNNBase):
    def __init__(self):
        super().__init__()
        
        self.network = models.resnet18(pretrained=True)
        # Replace last layer
        num_ftrs = self.network.fc.in_features
        self.network.fc = nn.Sequential(
            nn.Linear(num_ftrs, 128),
            nn.ReLU(),
            nn.Linear(128, 2),
            nn.LogSoftmax(dim=1)
        )
        
    def forward(self, xb):
        return self.network(xb)