In [None]:
!pip install -U scikit-learn

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [1]:
import numpy as np # linear algebra
import matplotlib.pyplot as plt
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import shutil
import json
import re
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import torch.utils.data as data_utils
from torch.nn.modules import MSELoss, L1Loss

import sklearn.preprocessing
from sklearn.preprocessing import MultiLabelBinarizer
import glob
import csv
import cv2
import random
from PIL import Image
from itertools import product

In [2]:
path1 = "./Movie_Poster_Metadata/groundtruth"
temp_path = "./Movie_Poster_Metadata/temp_groundtruth"
path2 = "./Movie_Poster_Metadata/updated_groundtruth"
cropped_dataset = "./croppedrandom"

# Jay's gdrive paths:
#path1 = "/content/drive/MyDrive/Uppsala University/Study material/NCML/Movie_Poster_Metadata/groundtruth"
#temp_path = "./Movie_Poster_Metadata/temp_groundtruth"
#path2 = "/content/gdrive/MyDrive/Uppsala University/Study material/NCML/Movie_Poster_Metadata/updated_groundtruth"

### Function to append all the json objects into dataframe 

In [3]:
dir_list = os.listdir(path2)

movies_df = pd.DataFrame()

for file_name in dir_list:    

    df = pd.read_json(path2+'/'+file_name,encoding='utf-8',orient='records')
    df = df[['imdbID','Director','Genre','imdbRating']]
    movies_df = pd.concat([movies_df,df], ignore_index=True)

### Creating multi-hot encoded genre vectors

In [4]:
#remove duplicates and set imdbID as index
movies_df = movies_df.drop_duplicates(subset=["imdbID"], keep="last")
movies_df.set_index("imdbID", inplace=True)

In [5]:
mlb = MultiLabelBinarizer()
multihot_temp = mlb.fit_transform(movies_df["Genre"].dropna().str.split(", "))

In [6]:
#['Action' 'Adult' 'Adventure' 'Animation' 'Biography' 
#'Comedy' 'Crime' 'Documentary' 'Drama' 'Family' 
#'Fantasy' 'Game-Show' 'History' 'Horror' 'Music' 
#'Musical' 'Mystery' 'N/A' 'News' 'Reality-TV' 
#'Romance' 'Sci-Fi' 'Short' 'Sport' 'Talk-Show' 
#'Thriller' 'War' 'Western']

multihot = [0]*len(multihot_temp)

item = 0
for vec in multihot_temp:
    
    new_vec = np.empty([20])
    for i in range(20):
        new_vec[i] = 0
    
    index = 0
    for i in range(28):
        
        #If genre is set in multihot-encoded vector
        if vec[i] == 1:
            
            #Adult, Game-Show, News, Reality-TV, Short, Talk-Show, Western -> Other
            other_genres = [1, 11, 18, 19, 22, 24, 27]
            if i in other_genres:
                new_vec[17] = 1
                #continue
                
            #Put Musical in same category as Music
            if i == 15:
                new_vec[index-1] = 1
        
            #Skip N/A
            if i == 17:
                continue
            
            else:
                new_vec[index] = 1
                index += 1
                
        if vec[i] == 0:
            relevant_genres = [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 16, 20, 21, 23, 25]
            if i in relevant_genres:
                index += 1
                
    multihot[item] = new_vec
    item += 1

In [7]:
genres_df = pd.DataFrame({"multihot":[multihot_temp.astype(int)]}, index = movies_df.index)
movies_df = pd.concat([movies_df, genres_df], axis=1 )
print(mlb.classes_)

['Action' 'Adult' 'Adventure' 'Animation' 'Biography' 'Comedy' 'Crime'
 'Documentary' 'Drama' 'Family' 'Fantasy' 'Game-Show' 'History' 'Horror'
 'Music' 'Musical' 'Mystery' 'N/A' 'News' 'Reality-TV' 'Romance' 'Sci-Fi'
 'Short' 'Sport' 'Talk-Show' 'Thriller' 'War' 'Western']


In [8]:
#create a dictionary with multi-hot encoded vectors; index = imdbID
multihot_dict = {movies_df.index.tolist()[i] : multihot[i] for i in range(0, len(multihot))}

## Hyper Parameters

In [9]:
#training controls
batch_size = 1
number_of_labels = 20
epochs = 10
training_size = 0.7
learning_rate = 0.5 #0.1 #0.01 #0.001
dropout = [0.3, 0.3, 0.3, 0.3, 0.2, 0.2, 0.2, 0.2, 0.15]
# input image dimensions
img_rows, img_cols = 150, 150

### Passing the images through a convolutional network

In [10]:
# the data holders
x_test = []
x_train = []
y_test = []
y_train = []

#images need to have the same size!!
flist=glob.glob('./cropped_random_by_size/*.jpg')
random.shuffle(flist)

length=int(len(flist)*training_size)
i = 0

genre_count = np.empty([number_of_labels])
for l in range(number_of_labels):
    genre_count[l] = 0

#create lists with input data (images) and output data (multi-hot encoded genre vectors)
for filename in flist:
        
    imdb_id = filename[filename.index("tt"):filename.index(".x")]
      
    if imdb_id in multihot_dict:

        img = np.array(cv2.imread(filename))
        img = np.swapaxes(img, 2,0)
        img = np.swapaxes(img, 2,1)
        
        genre_arr = np.empty([number_of_labels])
        
        for j in range(len(multihot_dict[imdb_id])):
            genre_arr[j] = multihot_dict[imdb_id][j]
            if multihot_dict[imdb_id][j] == 1:
                genre_count[j] += 1
        if(i<length):  
            x_train.append(img)
            y_train.append(genre_arr)
        else:
            x_test.append(img)
            y_test.append(genre_arr)
        
        i +=1 

classes = ["Action", "Adventure", "Animation", "Biography", "Comedy", "Crime", "Documentary", "Drama", "Family", "Fantasy", "History", "Horror", "Music", "Mystery", "Romance", "Sci-Fi", "Sport", "War", "Other"]
img_sum = 0
for c in range(19):
    print(classes[c] + ": "+ str(genre_count[c]))
    img_sum += genre_count[c]
    

Action: 5888.0
Adventure: 5452.0
Animation: 2991.0
Biography: 3487.0
Comedy: 9998.0
Crime: 3809.0
Documentary: 4408.0
Drama: 14690.0
Family: 3404.0
Fantasy: 2753.0
History: 3047.0
Horror: 2951.0
Music: 2782.0
Mystery: 3272.0
Romance: 4643.0
Sci-Fi: 2888.0
Sport: 3162.0
War: 4017.0
Other: 2944.0


In [11]:
#converting the data from lists to numpy arrays
x_train=np.asarray(x_train,dtype=float)
x_test=np.asarray(x_test,dtype=float)
y_train=np.asarray(y_train,dtype=float)
y_test=np.asarray(y_test,dtype=float)

#scaling down the RGB data
x_train /= 255
x_test /= 255

#printing stats about the features
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

train_length = x_train.shape[0]

x_train=torch.from_numpy(x_train)
x_test=torch.from_numpy(x_test)
y_train=torch.from_numpy(y_train)
y_test=torch.from_numpy(y_test)

train = data_utils.TensorDataset(x_train, y_train)
train_loader = data_utils.DataLoader(train, batch_size=batch_size, shuffle=True)

test = data_utils.TensorDataset(x_test, y_test)
test_loader = data_utils.DataLoader(test, batch_size=batch_size, shuffle=False)


x_train shape: (23045, 3, 150, 150)
23045 train samples
9877 test samples


### DenseNet-121

In [12]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet121', pretrained=True)

for param in model.parameters():
    param.requires_grad = False 

model.classifier = nn.Sequential(
    nn.Linear(1024, number_of_labels),
    # nn.Linear(1024, 512),
    # nn.Dropout(p=0.1),
    # nn.ReLU(),
    # nn.Linear(512, 28),
    nn.Sigmoid()
)


model.double() 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"model loaded on {device}")
model.to(device) #hopefully runs model on cuda core.
print(model)

Using cache found in C:\Users\carol/.cache\torch\hub\pytorch_vision_v0.10.0


model loaded on cpu
DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats

## Custom CNN

In [15]:
class Network(nn.Module):
    def __init__(self, input_shape=(3, img_rows, img_cols)):
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(3, 128, kernel_size=2)
        self.conv1_drop = nn.Dropout2d(p=dropout[0])
        self.conv2 = nn.Conv2d(128, 64, kernel_size=2)
        self.conv2_drop = nn.Dropout2d(p=dropout[1])
        self.conv3 = nn.Conv2d(64, 64, kernel_size=2)
        self.conv3_drop = nn.Dropout2d(p=dropout[2])
        self.conv4 = nn.Conv2d(64, 64, kernel_size=2)
        self.conv4_drop = nn.Dropout2d(p=dropout[3])
        self.conv5 = nn.Conv2d(64, 32, kernel_size=2)
        self.conv5_drop = nn.Dropout2d(p=dropout[4])
        self.conv6 = nn.Conv2d(32, 16, kernel_size=2)
        self.conv6_drop = nn.Dropout2d(p=dropout[5])
        
        self.n_size = self._get_conv_output(input_shape)
        """
        self.fc1 = nn.Linear(n_size, 16)
        self.fc1_drop = nn.Dropout(p=dropout[6])
        self.fc2 = nn.Linear(16, 16)
        self.fc2_drop = nn.Dropout(p=dropout[7])
        self.fc3 = nn.Linear(16, 8)
        self.fc3_drop = nn.Dropout(p=dropout[8])
        self.fc4 = nn.Linear(8, 28)
        """
        self.fc1 = nn.Linear(self.n_size, 20)
        self.sigmoid = nn.Sigmoid()

        
    def _get_conv_output(self, shape):
        bs = 1
        input = Variable(torch.rand(bs, *shape))
        output_feat = self._forward_features(input)
        #output_feat = self.forward(input)
        n_size = output_feat.data.view(bs, -1).size(1)
        return n_size
        
    def _forward_features(self, x):
        x = F.relu(F.max_pool2d(self.conv1_drop(self.conv1(x)), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = F.relu(F.max_pool2d(self.conv3_drop(self.conv3(x)), 2))
        x = F.relu(F.max_pool2d(self.conv4_drop(self.conv4(x)), 2))
        x = F.relu(F.max_pool2d(self.conv5_drop(self.conv5(x)), 2))
        #x = F.relu(F.max_pool2d(self.conv6_drop(self.conv6(x)), 2))
        return x
        
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1_drop(self.conv1(x)), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = F.relu(F.max_pool2d(self.conv3_drop(self.conv3(x)), 2))
        x = F.relu(F.max_pool2d(self.conv4_drop(self.conv4(x)), 2))
        x = F.relu(F.max_pool2d(self.conv5_drop(self.conv5(x)), 2))
        #x = F.relu(F.max_pool2d(self.conv6_drop(self.conv6(x)), 2))
        x = x.view(x.size(0), -1)
        # x = self.sigmoid(x)
        # x = torch.sigmoid(self.fc1(x))

        #x = F.relu(self.fc1_drop(self.fc1(x)))
        #x = F.relu(self.fc2_drop(self.fc2(x)))
        #x = F.relu(self.fc3_drop(self.fc3(x)))
        #x = self.fc4(x)
        return x

#model = Network()
#model.double() 

Network(
  (conv1): Conv2d(3, 128, kernel_size=(2, 2), stride=(1, 1))
  (conv1_drop): Dropout2d(p=0.3, inplace=False)
  (conv2): Conv2d(128, 64, kernel_size=(2, 2), stride=(1, 1))
  (conv2_drop): Dropout2d(p=0.3, inplace=False)
  (conv3): Conv2d(64, 64, kernel_size=(2, 2), stride=(1, 1))
  (conv3_drop): Dropout2d(p=0.3, inplace=False)
  (conv4): Conv2d(64, 64, kernel_size=(2, 2), stride=(1, 1))
  (conv4_drop): Dropout2d(p=0.3, inplace=False)
  (conv5): Conv2d(64, 32, kernel_size=(2, 2), stride=(1, 1))
  (conv5_drop): Dropout2d(p=0.2, inplace=False)
  (conv6): Conv2d(32, 16, kernel_size=(2, 2), stride=(1, 1))
  (conv6_drop): Dropout2d(p=0.2, inplace=False)
  (fc1): Linear(in_features=288, out_features=20, bias=True)
  (sigmoid): Sigmoid()
)

## Loss function

In [13]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.0) #TODO: check best params here

## Train and Test functions

In [14]:
# Function to save the model
def saveModel():
    path = "./trained_model.pth"
    torch.save(model.state_dict(), path)

# Function to test the model with the test dataset and print the accuracy for the test images
def testAccuracy():
    model.eval() #TODO: check model.train() which is used along model.eval()
    accuracy = 0.0
    total = 0.0

    classes = ["Action", "Adventure", "Animation", "Biography", "Comedy", "Crime", "Documentary", "Drama", "Family", "Fantasy", "History", "Horror", "Music", "Mystery", "Romance", "Sci-Fi", "Sport", "War", "Other"]

    class_correct = list(0. for i in range(number_of_labels))
    class_total = list(0. for i in range(number_of_labels))

    with torch.no_grad():
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        print(f"model testing on {device}")
        some_accuracy_measure = 0
        final_accuracy = 0
        for i, (images, labels) in enumerate(test_loader,0):
            sum = 0
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))
            outputs = model(images) 
            for j, label in enumerate(labels):
              n = sum + label.sum().item()
              # get the top n values of outputs
              _, predicted = torch.topk(outputs[j], int(n)) #TODO: find best threshold and calc F1.

              correct_predictions = 0
              for _, k in enumerate(predicted):
                class_total[k] += 1
                if(label[k].item() == 1):
                    correct_predictions += 1
                    class_correct[k] +=1
                some_accuracy_measure = correct_predictions/n
            final_accuracy += some_accuracy_measure  
            # the label with the highest energy will be our prediction
            total += labels.size(0)
    print("Total: ", total)
    # compute the accuracy over all test images
    accuracy = final_accuracy * 100 /total
    print("Accuracy;: ", accuracy)

    for i in range(number_of_labels):
        if class_total[i]!=0:
            print('Accuracy of %5s : %2d %%' % (
                    classes[i], 100 * class_correct[i] / class_total[i]))

    model.train()
    return(accuracy)


# Training function. We simply have to loop over our data iterator and feed the inputs to the network and optimize.
def train(num_epochs):
    
    best_accuracy = 0.0

    # Define your execution device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("The model will be running on", device, "device")
    # Convert model parameters and buffers to CPU or Cuda
    model.to(device)

    for epoch in range(num_epochs):  # loop over the dataset multiple times
        print("Starting epoch", epoch)
        running_loss = 0.0
        running_acc = 0.0

        for i, (images, labels) in enumerate(train_loader, 0):
            
            # get the inputs
            images = Variable(images.to(device))
            labels = Variable(labels.to(device))
            # zero the parameter gradients
            optimizer.zero_grad()
            # predict classes using images from the training set
            outputs = model(images)
            # compute the loss based on model output and real labels
            loss = loss_fn(outputs, labels)
            # backpropagate the loss
            loss.backward()
            # adjust parameters based on the calculated gradients
            optimizer.step()

            # Let's print statistics for every 1,000 images
            running_loss += loss.item()     # extract the loss value
            if i % 1000 == 0:    
                # print every 1000 (twice per epoch) 
                #print('[%d, %5d] loss: %.3f' %
                #      (epoch + 1, i + 1, running_loss / 1000))
                # zero the loss
                running_loss = 0.0

        # Compute and print the average accuracy fo this epoch when tested over all 10000 test images
        #accuracy = testAccuracy() #BUG: why is this in for loop? 
        #print('For epoch', epoch+1,'the test accuracy over the whole test set is %d %%' % (accuracy))
        
        # we want to save the model if the accuracy is the best
        #if accuracy > best_accuracy:
         #   saveModel()
          #  best_accuracy = accuracy

## Functions to display sample output

In [15]:
# Function to show the images
def imageshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# Function to test the model with a batch of images and show the labels predictions
def testBatch():
    # get batch of images from the test DataLoader  
    images, labels = next(iter(test_loader))

    # show all images as one image grid
    imageshow(torchvision.utils.make_grid(images))
   
    # Show the real labels on the screen 
    print('Real labels: ', ' '.join('%5s' % classes[labels[j]] 
                               for j in range(batch_size)))
  
    # Let's see what if the model identifiers the  labels of those example
    outputs = model(images)
    
    # We got the probability for every 10 labels. The highest (max) probability should be correct label
    _, predicted = torch.max(outputs, 1)
    
    # Let's show the predicted labels on the screen to compare with the real ones
    print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] 
                              for j in range(batch_size)))

## Run Model

In [16]:
train(1)
print("Training done.")

The model will be running on cpu device
Starting epoch 0
Training done.


In [17]:
model.eval() 
accuracy = 0.0
total = 0.0

classes = ["Action", "Adventure", "Animation", "Biography", "Comedy", "Crime", "Documentary", "Drama", "Family", "Fantasy", "History", "Horror", "Music", "Mystery", "Romance", "Sci-Fi", "Sport", "War", "Other"]

class_correct = list(0. for i in range(number_of_labels))
class_total = list(0. for i in range(number_of_labels))

with torch.no_grad():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"model testing on {device}")
    some_accuracy_measure = 0
    final_accuracy = 0
    for i, (images, labels) in enumerate(test_loader,0):
        sum = 0
        images = Variable(images.to(device))
        labels = Variable(labels.to(device))
        outputs = model(images) 
        for j, label in enumerate(labels):
            n = sum + label.sum().item()
            # get the top n values of outputs
            _, predicted = torch.topk(outputs[j], int(n)) #TODO: find best threshold and calc F1.
            _, groundtruth = torch.topk(labels[j], int(n))
                
            correct_predictions = 0
            for _, k in enumerate(predicted):
                
                if(label[k].item() == 1):
                    correct_predictions += 1
                some_accuracy_measure = correct_predictions/n
                if (0):
                    imageshow(images[j])
                    print(outputs[j])
                    print(labels[j])
                    print(some_accuracy_measure)
            for l in range(number_of_labels):
                
                if (label[l].item() == 1):  
                    class_total[l] += 1
                    if (l in predicted):
                        class_correct[l] +=1
        final_accuracy += some_accuracy_measure 
        
        total += labels.size(0)
        
print("Total: ", total)
# compute the accuracy over all test images
accuracy = final_accuracy * 100 /total
print("Accuracy;: ", accuracy)

model testing on cpu
Total:  9877.0
Accuracy;:  25.704498666936793
