In [1]:
import torch
from torch import optim, cuda
from torch.utils.data import DataLoader, sampler
import torch.nn as nn

import torchvision
from torchvision import transforms, datasets, models
import torch.nn.functional as F

import numpy as np
import ast

import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import Dataset
from PIL import Image
from timeit import default_timer as timer

In [2]:
torch.cuda.is_available()

True

In [3]:
# Image transformations
image_transforms = {
    # Train uses data augmentation
    'train':
    transforms.Compose([
        transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
        transforms.RandomRotation(degrees=15),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),  # Image net standards
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  # Imagenet standards
    ]),
    # Validation does not use augmentation
    'val':
    transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    # Test does not use augmentation
    'test':
    transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [4]:
class MovieAttributeDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path)
        budget = self.img_labels.iloc[idx, 1]
        genres = torch.Tensor(ast.literal_eval(self.img_labels.iloc[idx, 2]))
        if self.transform:
            image = self.transform(image)
        return image, budget, genres

In [11]:
training_data = pd.read_csv("training_data.csv")
train_set = training_data.sample(frac=0.9)
val_set = training_data.loc[~training_data.index.isin(train_set.index)]
# print(train_set)
# print(val_set)
train_set.to_csv("train_set.csv", index=False)
val_set.to_csv("val_set.csv", index=False)

In [5]:
train_dataset = MovieAttributeDataset(annotations_file="train_set.csv", img_dir="", transform=image_transforms["train"])
val_dataset = MovieAttributeDataset(annotations_file="val_set.csv", img_dir="", transform=image_transforms["val"])

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=True)

In [6]:
for images, budgets, genres in train_dataloader:
    print(images.shape) # (batch_size, color_channels, height, width)
    print(budgets.shape)
    print(genres.shape)
    break

torch.Size([64, 3, 224, 224])
torch.Size([64])
torch.Size([64, 23])


In [6]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        # m x m x 3 image
        # 224x224x3 => 222x222x32, kernel: k -> m - k + 1
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
        self.d1 = nn.Linear(222 * 222 * 32, 256)
        self.d2 = nn.Linear(256, 256)
        self.d3 = nn.Linear(256, 256)
        self.d4 = nn.Linear(256, 5)

        self.d5 = nn.Linear(256, 23)

    def forward(self, x):
        # 64x3x224x224 => 32x32x222x222
        x = self.conv1(x)
        x = F.relu(x)

        # flatten => 64 x (32*222*222)
        x = x.flatten(start_dim = 1)

        # 64 x (32*222*222) => 64x256
        x = self.d1(x)
        x = F.relu(x)
        x = F.dropout(x, 0.5)

        # 64 x 256 => 64x256
        x = self.d2(x)
        x = F.relu(x)
        x = F.dropout(x, 0.5)

        # 64 x 256 => 64x256
        x = self.d3(x)
        x = F.relu(x)
        x = F.dropout(x, 0.5)

        # logits (the raw, unnormalized predictions) => 64x5
        out = self.d4(x)
        # out = F.softmax(logits, dim=1) # turn logits into a set of probabilities.

        out2 = self.d5(x)
        # out2 = F.sigmoid(logits2, dim=1)
        return out, out2

In [76]:
# model = MyModel()
# for images, labels in train_dataloader:
#     print("batch size:", images.shape)
#     out = model(images)
#     print(out.shape)
#     break

In [7]:
learning_rate = 0.01
num_epochs = 5

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = MyModel()
model = model.to(device)
criterion = nn.CrossEntropyLoss()
criterion2 = nn.BCEWithLogitsLoss(reduction='mean') # binary cross entropy loss + softmax
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [8]:
def train(model,
          criterion1,
          criterion2,
          optimizer,
          train_loader,
          valid_loader,
          save_file_name,
          max_epochs_stop=3,
          n_epochs=20,
          print_every=2):
    """Train a PyTorch Model

    Params
    --------
        model (PyTorch model): cnn to train
        criterion (PyTorch loss): objective to minimize
        optimizer (PyTorch optimizier): optimizer to compute gradients of model parameters
        train_loader (PyTorch dataloader): training dataloader to iterate through
        valid_loader (PyTorch dataloader): validation dataloader used for early stopping
        save_file_name (str ending in '.pt'): file path to save the model state dict
        max_epochs_stop (int): maximum number of epochs with no improvement in validation loss for early stopping
        n_epochs (int): maximum number of training epochs
        print_every (int): frequency of epochs to print training stats

    Returns
    --------
        model (PyTorch model): trained cnn with best weights
        history (DataFrame): history of train and validation loss and accuracy
    """

    # Early stopping intialization
    epochs_no_improve = 0
    valid_loss_min = np.Inf

    valid_max_acc = 0
    history = []

    # Number of epochs already trained (if using loaded in model weights)
    try:
        print(f'Model has been trained for: {model.epochs} epochs.\n')
    except:
        model.epochs = 0
        print(f'Starting Training from Scratch.\n')

    overall_start = timer()

    # Main loop
    for epoch in range(n_epochs):

        # keep track of training and validation loss each epoch
        train_loss = 0.0
        valid_loss = 0.0

        train_budget_acc = 0
        train_genre_acc = 0
        valid_budget_acc = 0
        valid_genre_acc = 0

        # Set to training
        model.train()
        start = timer()

        ## training step
        for ii, (images, budgets, genres) in enumerate(train_dataloader):

            images = images.to(device)
            budgets = budgets.to(device)
            genres = genres.to(device)

            # Clear gradients
            optimizer.zero_grad()
            # Predicted outputs are non-log probabilities
            budget_pred, genre_pred = model(images)
            # 64 x 23

            # Loss and backpropagation of gradients
            loss1 = criterion1(budget_pred, budgets)
            loss2 = criterion2(genre_pred, genres)
            loss = loss1 + loss2
            loss.backward()

            # Update the parameters
            optimizer.step()

            # Track train loss by multiplying average loss by number of examples in batch
            train_loss += loss.item() * images.size(0)

            # Calculate accuracy by finding max log probability
            _, pred1 = torch.max(budget_pred, dim=1)
            correct_tensor = pred1.eq(budgets.data.view_as(pred1))

            _, idx = genre_pred.topk(3, dim=1)
            pred2 = torch.zeros_like(genre_pred)
            pred2[torch.arange(genres.size(0)).unsqueeze(1), idx] = 1
            correct_tensor2 = pred2.eq(genres.data.view_as(pred2))
            
            # Need to convert correct tensor from int to float to average
            accuracy = torch.mean(correct_tensor.type(torch.FloatTensor))
            accuracy2 = torch.mean(correct_tensor2.type(torch.FloatTensor))
            # Multiply average accuracy times the number of examples in batch
            train_budget_acc += accuracy.item() * images.size(0)
            train_genre_acc += accuracy2.item() * genres.size(0)

            # Track training progress
            print(
                f'Epoch: {epoch}\t{100 * (ii + 1) / len(train_loader):.2f}% complete. {timer() - start:.2f} seconds elapsed in epoch.',
                end='\r')

        # After training loops ends, start validation
        else:
            model.epochs += 1

            # Don't need to keep track of gradients
            with torch.no_grad():
                # Set to evaluation mode
                model.eval()

                # Validation loop
                for images, budgets, genres in valid_loader:
                    # Tensors to gpu
                    images = images.to(device)
                    budgets = budgets.to(device)
                    genres = genres.to(device)

                    # Forward pass
                    budget_pred, genre_pred = model(images)

                    # Validation loss
                    loss = criterion(budget_pred, budgets)
                    # Multiply average loss times the number of examples in batch
                    valid_loss += loss.item() * images.size(0)

                    # Calculate validation accuracy
                    _, pred = torch.max(budget_pred, dim=1)
                    correct_tensor = pred.eq(budgets.data.view_as(pred))

                    _, idx = genre_pred.topk(3, dim=1)
                    pred2 = torch.zeros_like(genre_pred)
                    pred2[torch.arange(genres.size(0)).unsqueeze(1), idx] = 1
                    correct_tensor2 = pred2.eq(genres.data.view_as(pred2))

                    accuracy = torch.mean(
                        correct_tensor.type(torch.FloatTensor))
                    accuracy2 = torch.mean(correct_tensor2.type(torch.FloatTensor))
                    # Multiply average accuracy times the number of examples
                    valid_budget_acc += accuracy.item() * images.size(0)
                    valid_genre_acc += accuracy2.item() * genres.size(0)

                # Calculate average losses
                train_loss = train_loss / len(train_loader.dataset)
                valid_loss = valid_loss / len(valid_loader.dataset)

                # Calculate average accuracy
                train_budget_acc = train_budget_acc / len(train_loader.dataset)
                train_genre_acc = train_genre_acc / len(train_loader.dataset)
                valid_budget_acc = valid_budget_acc / len(valid_loader.dataset)
                valid_genre_acc = valid_genre_acc / len(valid_loader.dataset)

                history.append([train_loss, valid_loss, train_budget_acc, valid_budget_acc, train_genre_acc, valid_genre_acc])

                # Print training and validation results
                if (epoch + 1) % print_every == 0:
                    print(
                        f'\nEpoch: {epoch} \tTraining Loss: {train_loss:.4f} \tValidation Loss: {valid_loss:.4f}'
                    )
                    print(
                        f'\t\tBudget Training Accuracy: {100 * train_budget_acc:.2f}%\t Budget Validation Accuracy: {100 * valid_budget_acc:.2f}%'
                    )
                    print(
                        f'\t\tGenre Training Accuracy: {100 * train_genre_acc:.2f}%\t Genre Validation Accuracy: {100 * valid_genre_acc:.2f}%'
                    )

                # Save the model if validation loss decreases
                if valid_loss < valid_loss_min:
                    # Save model
                    torch.save(model.state_dict(), save_file_name)
                    # Track improvement
                    epochs_no_improve = 0
                    valid_loss_min = valid_loss
                    valid_best_acc = valid_budget_acc
                    best_epoch = epoch

                # Otherwise increment count of epochs with no improvement
                else:
                    epochs_no_improve += 1
                    # Trigger early stopping
                    if epochs_no_improve >= max_epochs_stop:
                        print(
                            f'\nEarly Stopping! Total epochs: {epoch}. Best epoch: {best_epoch} with loss: {valid_loss_min:.2f} and acc: {100 * valid_best_acc:.2f}%'
                        )
                        total_time = timer() - overall_start
                        print(
                            f'{total_time:.2f} total seconds elapsed. {total_time / (epoch+1):.2f} seconds per epoch.'
                        )

                        # Load the best state dict
                        model.load_state_dict(torch.load(save_file_name))
                        # Attach the optimizer
                        model.optimizer = optimizer

                        # Format history
                        history = pd.DataFrame(
                            history,
                            columns=[
                                'train_loss', 'valid_loss', 'train_budget_acc', 
                                'valid_budget_acc', 'train_genre_acc', 'valid_genre_acc'
                            ])
                        return model, history

    # Attach the optimizer
    model.optimizer = optimizer
    # Record overall time and print out stats
    total_time = timer() - overall_start
    print(
        f'\nBest epoch: {best_epoch} with loss: {valid_loss_min:.2f} and acc: {100 * valid_best_acc:.2f}%'
    )
    print(
        f'{total_time:.2f} total seconds elapsed. {total_time / (epoch):.2f} seconds per epoch.'
    )
    # Format history
    history = pd.DataFrame(
        history,
        columns=['train_loss', 'valid_loss', 'train_budget_acc', 'valid_budget_acc', 'train_genre_acc', 'valid_genre_acc'])
    return model, history

In [9]:
train(model, criterion, criterion2, optimizer, train_dataloader, val_dataloader, "test.pt", 3, 10, 2)

Starting Training from Scratch.

Epoch: 1	100.00% complete. 45.97 seconds elapsed in epoch.
Epoch: 1 	Training Loss: 1.9756 	Validation Loss: 1.3319
		Budget Training Accuracy: 56.40%	 Budget Validation Accuracy: 50.60%
		Genre Training Accuracy: 86.01%	 Genre Validation Accuracy: 84.78%
Epoch: 3	100.00% complete. 45.39 seconds elapsed in epoch.
Epoch: 3 	Training Loss: 1.4991 	Validation Loss: 1.3007
		Budget Training Accuracy: 56.60%	 Budget Validation Accuracy: 50.60%
		Genre Training Accuracy: 86.08%	 Genre Validation Accuracy: 84.68%
Epoch: 5	100.00% complete. 49.91 seconds elapsed in epoch.
Epoch: 5 	Training Loss: 1.4907 	Validation Loss: 1.2919
		Budget Training Accuracy: 56.66%	 Budget Validation Accuracy: 50.60%
		Genre Training Accuracy: 86.20%	 Genre Validation Accuracy: 84.78%
Epoch: 7	100.00% complete. 49.39 seconds elapsed in epoch.
Epoch: 7 	Training Loss: 1.4928 	Validation Loss: 1.2880
		Budget Training Accuracy: 56.66%	 Budget Validation Accuracy: 50.60%
		Genre Trai

(MyModel(
   (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
   (d1): Linear(in_features=1577088, out_features=256, bias=True)
   (d2): Linear(in_features=256, out_features=256, bias=True)
   (d3): Linear(in_features=256, out_features=256, bias=True)
   (d4): Linear(in_features=256, out_features=5, bias=True)
   (d5): Linear(in_features=256, out_features=23, bias=True)
 ),
    train_loss  valid_loss  train_budget_acc  valid_budget_acc  \
 0  224.895041    1.530242          0.385224          0.500000   
 1    1.975552    1.331917          0.563984          0.505952   
 2    1.547209    1.301177          0.566623          0.505952   
 3    1.499132    1.300748          0.565963          0.505952   
 4    1.510030    1.289033          0.563325          0.505952   
 5    1.490667    1.291912          0.566623          0.505952   
 6    1.501748    1.291521          0.566623          0.505952   
 7    1.492839    1.287957          0.566623          0.505952   
 8    1.487282    1.

In [16]:
test_dataset = MovieAttributeDataset(annotations_file="test_data.csv", img_dir="", transform=image_transforms["test"])
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)

In [18]:
final = MyModel()
final.load_state_dict(torch.load('test.pt'))

<All keys matched successfully>

In [32]:
def test(model, test_dataloader):
    budget_acc = 0
    genre_acc = 0
    for (images, budgets, genres) in test_dataloader:
        out1, out2 = model(images)

        budget_pred = torch.argmax(out1, dim=1)
        correct = budget_pred.eq(budgets.data.view_as(budget_pred))

        _, idx = out2.topk(3, dim=1)
        genre_pred = torch.zeros_like(out2)
        genre_pred[torch.arange(genres.size(0)).unsqueeze(1), idx] = 1
        correct2 = genre_pred.eq(genres.data.view_as(genre_pred))
        # Need to convert correct tensor from int to float to average
        budget_accuracy = torch.mean(correct.type(torch.FloatTensor))
        genre_accuracy = torch.mean(correct2.type(torch.FloatTensor))
        # Multiply average accuracy times the number of examples in batch
        budget_acc += budget_accuracy.item() * images.size(0)
        genre_acc += genre_accuracy.item() * genres.size(0)
    
    budget_acc = budget_acc / len(test_dataloader.dataset)
    genre_acc = genre_acc / len(test_dataloader.dataset)
    print(
        f'Budget Accuracy: {100 * budget_acc:.2f}%\t Genre Accuracy: {100 * genre_acc:.2f}%'
    )

In [33]:
test(final, test_dataloader)

Budget Accuracy: 100.00%	 Genre Accuracy: 85.60%
