# CNN-RNN Model Training, Dependent Labels
## Author: Ian Scarff (iie728)

In [25]:
import pandas as pd
import numpy as np
import json
import os
import imageio
import matplotlib.pyplot as plt
import seaborn as sns
import random
from sklearn.preprocessing import MultiLabelBinarizer
import time
import datetime

import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torchvision
import torch.nn.functional as F
from torchvision import transforms
from torchvision import models
from torch.utils.data import DataLoader, SubsetRandomSampler
from sklearn.model_selection import KFold
import skimage

from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

import warnings
warnings.filterwarnings('ignore')

## Setup

In [2]:
classes = ['Smears', 'Loaded Language', 'Name calling/Labeling', 'Glittering generalities (Virtue)',
               'Appeal to (Strong) Emotions', 'Appeal to fear/prejudice', 'Transfer', 'Doubt',
               'Exaggeration/Minimisation', 'Whataboutism', 'Slogans', 'Flag-waving',
               "Misrepresentation of Someone's Position (Straw Man)", 'Causal Oversimplification',
               'Thought-terminating cliché', 'Black-and-white Fallacy/Dictatorship', 'Appeal to authority',
               'Reductio ad hitlerum', 'Repetition', 'Obfuscation, Intentional vagueness, Confusion',
               'Presenting Irrelevant Data (Red Herring)', 'Bandwagon']

### Create Class Binarizer
one_hot = MultiLabelBinarizer()
one_hot.fit([classes])

MultiLabelBinarizer()

In [3]:
from CustomLoader import ImageLoader

In [4]:
training_data = ImageLoader(json_file = 'training_data.json', root_dir = 'Images',
                           transform = transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Resize(size = (224,224)),
                               transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)) ### Pixel range [-1,1]
                           ]))

testing_data = ImageLoader(json_file = 'testing_data.json', root_dir = 'Images',
                           transform = transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Resize(size = (224,224)),
                               transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)) ### Pixel range [-1,1]
                           ]))

In [5]:
train_loader = DataLoader(dataset = training_data, batch_size = 25, shuffle = True)
test_loader = DataLoader(dataset = testing_data, batch_size = 25, shuffle = True)

# 
# 

## Define CNN-RNN Architectures (Set Hidden Size)

### ResNet18-LSTM

In [36]:
### Define Class
class ResNet18_LSTM(nn.Module):
    def __init__(self, dpout):
        super(ResNet18_LSTM, self).__init__()
        
        ### Number of labels
        self.num_labels = 22
        
        
        ### Import ResNet18
        resnet18 = models.resnet18(pretrained = True)
        
        ### Remove classifier
        resnet18 = nn.Sequential(*list(resnet18.children())[:-1])
        
        ### Freeze parameters
        for param in resnet18.parameters():
            param.requires_grad = False
            
            
        ### CNN Encoder
        self.cnn = resnet18
        
        ### CNN output size
        self.cnn_output_size = 512
            
        
        ### Create LSTM decoder
        self.lstm = nn.LSTM(input_size = 1, 
                              hidden_size = self.cnn_output_size, ### Output size for each hidden state
                              num_layers = 1,
                              dropout = dpout, ### Dropout Rate,
                              batch_first = True
                             )
        
        
        ### Convert image representation to lstm hidden size
#         self.linear_image_to_lstm = nn.Linear(self.cnn_output_size, hiddenSize)
        
        ### Prediction layer
        self.prediction_layer = nn.Linear(self.cnn_output_size, 1)
        
    
    ### Forward pass
    def forward(self, images):
        
        ### Pass images to CNN encoder
        image_emb = self.cnn(images) 
        
        
        ### Flatten Image embeddings
        image_emb = image_emb.view(-1, self.cnn_output_size)
        
        
        ### Prep intial hidden state and cell state
#         h0 = torch.unsqueeze(self.linear_image_to_lstm(image_emb), 0)
        h0 = torch.unsqueeze(image_emb, 0)
        c0 = torch.autograd.Variable(torch.zeros(h0.size(0), h0.size(1), h0.size(2)).cuda(), requires_grad = False)
        
        zero_input = torch.autograd.Variable(torch.zeros(image_emb.size(0), self.num_labels, 1).cuda(), requires_grad = False)
        
        ### Run through LSTM decoder
        hidden_layers, _ = self.lstm(zero_input, (h0, c0))
        
        ### Unbind the hidden layers
        unbound = torch.unbind(hidden_layers, 1)
        
        ### Run each hidden layer through the prediction linear layer
        combined = [self.prediction_layer(i) for i in unbound]
        
        ### Stack predictions
        combined = torch.stack(combined, 1)
        
        final = F.sigmoid(combined)
        
        return final

### ResNet50-LSTM

In [None]:
### Define Class
class ResNet50_LSTM(nn.Module):
    def __init__(self, dpout):
        super(ResNet50_LSTM, self).__init__()
        
        ### Number of labels
        self.num_labels = 22
        
        
        ### Import ResNet50
        resnet50 = models.resnet50(pretrained = True)
        
        ### Remove classifier
        resnet50 = nn.Sequential(*list(resnet50.children())[:-1])
        
        ### Freeze parameters
        for param in resnet50.parameters():
            param.requires_grad = False
            
            
        ### CNN Encoder
        self.cnn = resnet50
        
        ### CNN output size
        self.cnn_output_size = 2048
            
        
        ### Create LSTM decoder
        self.lstm = nn.LSTM(input_size = 1, 
                              hidden_size = self.cnn_output_size, ### Output size for each hidden state
                              num_layers = 1,
                              dropout = dpout, ### Dropout Rate,
                              batch_first = True
                             )
        
        
        ### Convert image representation to lstm hidden size
#         self.linear_image_to_lstm = nn.Linear(self.cnn_output_size, hiddenSize)
        
        ### Prediction layer
        self.prediction_layer = nn.Linear(self.cnn_output_size, 1)
        
    
    ### Forward pass
    def forward(self, images):
        
        ### Pass images to CNN encoder
        image_emb = self.cnn(images) 
        
        
        ### Flatten Image embeddings
        image_emb = image_emb.view(-1, self.cnn_output_size)
        
        
        ### Prep intial hidden state and cell state
#         h0 = torch.unsqueeze(self.linear_image_to_lstm(image_emb), 0)
        h0 = torch.unsqueeze(image_emb, 0)
        c0 = torch.autograd.Variable(torch.zeros(h0.size(0), h0.size(1), h0.size(2)).cuda(), requires_grad = False)
        
        zero_input = torch.autograd.Variable(torch.zeros(image_emb.size(0), self.num_labels, 1).cuda(), requires_grad = False)
        
        ### Run through LSTM decoder
        hidden_layers, _ = self.lstm(zero_input, (h0, c0))
        
        ### Unbind the hidden layers
        unbound = torch.unbind(hidden_layers, 1)
        
        ### Run each hidden layer through the prediction linear layer
        combined = [self.prediction_layer(i) for i in unbound]
        
        ### Stack predictions
        combined = torch.stack(combined, 1)
        
        final = F.sigmoid(combined)
        
        return final

### ResNet101-LSTM

In [None]:
### Define Class
class ResNet101_LSTM(nn.Module):
    def __init__(self, dpout):
        super(ResNet101_LSTM, self).__init__()
        
        ### Number of labels
        self.num_labels = 22
        
        
        ### Import ResNet101
        resnet101 = models.resnet101(pretrained = True)
        
        ### Remove classifier
        resnet101 = nn.Sequential(*list(resnet101.children())[:-1])
        
        ### Freeze parameters
        for param in resnet101.parameters():
            param.requires_grad = False
            
            
        ### CNN Encoder
        self.cnn = resnet101
        
        ### CNN output size
        self.cnn_output_size = 2048
            
        
        ### Create LSTM decoder
        self.lstm = nn.LSTM(input_size = 1, 
                              hidden_size = self.cnn_output_size, ### Output size for each hidden state
                              num_layers = 1,
                              dropout = dpout, ### Dropout Rate,
                              batch_first = True
                             )
        
        
        ### Convert image representation to lstm hidden size
#         self.linear_image_to_lstm = nn.Linear(self.cnn_output_size, hiddenSize)
        
        ### Prediction layer
        self.prediction_layer = nn.Linear(self.cnn_output_size, 1)
        
    
    ### Forward pass
    def forward(self, images):
        
        ### Pass images to CNN encoder
        image_emb = self.cnn(images) 
        
        
        ### Flatten Image embeddings
        image_emb = image_emb.view(-1, self.cnn_output_size)
        
        
        ### Prep intial hidden state and cell state
#         h0 = torch.unsqueeze(self.linear_image_to_lstm(image_emb), 0)
        h0 = torch.unsqueeze(image_emb, 0)
        c0 = torch.autograd.Variable(torch.zeros(h0.size(0), h0.size(1), h0.size(2)).cuda(), requires_grad = False)
        
        zero_input = torch.autograd.Variable(torch.zeros(image_emb.size(0), self.num_labels, 1).cuda(), requires_grad = False)
        
        ### Run through LSTM decoder
        hidden_layers, _ = self.lstm(zero_input, (h0, c0))
        
        ### Unbind the hidden layers
        unbound = torch.unbind(hidden_layers, 1)
        
        ### Run each hidden layer through the prediction linear layer
        combined = [self.prediction_layer(i) for i in unbound]
        
        ### Stack predictions
        combined = torch.stack(combined, 1)
        
        final = F.sigmoid(combined)
        
        return final

### DenseNet101-LSTM

In [None]:
### Define Class
class DenseNet121_LSTM(nn.Module):
    def __init__(self, dpout):
        super(DenseNet121_LSTM, self).__init__()
        
        ### Number of labels
        self.num_labels = 22
        
        
        ### Import DenseNet101
        densenet121 = models.densenet121(pretrained = True)
        
        ### Remove classifier
        densenet121 = nn.Sequential(*list(densenet121.children())[:-1])
        
        ### Freeze parameters
        for param in densenet121.parameters():
            param.requires_grad = False
            
            
        ### CNN Encoder
        self.cnn = densenet121
        
        ### CNN output size
        self.cnn_output_size = 1024
            
        
        ### Create LSTM decoder
        self.lstm = nn.LSTM(input_size = 1, 
                              hidden_size = self.cnn_output_size, ### Output size for each hidden state
                              num_layers = 1,
                              dropout = dpout, ### Dropout Rate,
                              batch_first = True
                             )
        
        
        ### Convert image representation to lstm hidden size
#         self.linear_image_to_lstm = nn.Linear(self.cnn_output_size, hiddenSize)
        
        ### Prediction layer
        self.prediction_layer = nn.Linear(self.cnn_output_size, 1)
        
    
    ### Forward pass
    def forward(self, images):
        
        ### Pass images to CNN encoder
        image_emb = self.cnn(images) 
        
        
        ### Flatten Image embeddings
        image_emb = image_emb.view(-1, self.cnn_output_size)
        
        
        ### Prep intial hidden state and cell state
#         h0 = torch.unsqueeze(self.linear_image_to_lstm(image_emb), 0)
        h0 = torch.unsqueeze(image_emb, 0)
        c0 = torch.autograd.Variable(torch.zeros(h0.size(0), h0.size(1), h0.size(2)).cuda(), requires_grad = False)
        
        zero_input = torch.autograd.Variable(torch.zeros(image_emb.size(0), self.num_labels, 1).cuda(), requires_grad = False)
        
        ### Run through LSTM decoder
        hidden_layers, _ = self.lstm(zero_input, (h0, c0))
        
        ### Unbind the hidden layers
        unbound = torch.unbind(hidden_layers, 1)
        
        ### Run each hidden layer through the prediction linear layer
        combined = [self.prediction_layer(i) for i in unbound]
        
        ### Stack predictions
        combined = torch.stack(combined, 1)
        
        final = F.sigmoid(combined)
        
        return final

### DenseNet169-LSTM

In [None]:
### Define Class
class DenseNet169_LSTM(nn.Module):
    def __init__(self, dpout):
        super(DenseNet169_LSTM, self).__init__()
        
        ### Number of labels
        self.num_labels = 22
        
        
        ### Import DenseNet169
        densenet169 = models.densenet169(pretrained = True)
        
        ### Remove classifier
        densenet169 = nn.Sequential(*list(densenet169.children())[:-1])
        
        ### Freeze parameters
        for param in densenet169.parameters():
            param.requires_grad = False
            
            
        ### CNN Encoder
        self.cnn = densenet169
        
        ### CNN output size
        self.cnn_output_size = 1664
            
        
        ### Create LSTM decoder
        self.lstm = nn.LSTM(input_size = 1, 
                              hidden_size = self.cnn_output_size, ### Output size for each hidden state
                              num_layers = 1,
                              dropout = dpout, ### Dropout Rate,
                              batch_first = True
                             )
        
        
        ### Convert image representation to lstm hidden size
#         self.linear_image_to_lstm = nn.Linear(self.cnn_output_size, hiddenSize)
        
        ### Prediction layer
        self.prediction_layer = nn.Linear(self.cnn_output_size, 1)
        
    
    ### Forward pass
    def forward(self, images):
        
        ### Pass images to CNN encoder
        image_emb = self.cnn(images) 
        
        
        ### Flatten Image embeddings
        image_emb = image_emb.view(-1, self.cnn_output_size)
        
        
        ### Prep intial hidden state and cell state
#         h0 = torch.unsqueeze(self.linear_image_to_lstm(image_emb), 0)
        h0 = torch.unsqueeze(image_emb, 0)
        c0 = torch.autograd.Variable(torch.zeros(h0.size(0), h0.size(1), h0.size(2)).cuda(), requires_grad = False)
        
        zero_input = torch.autograd.Variable(torch.zeros(image_emb.size(0), self.num_labels, 1).cuda(), requires_grad = False)
        
        ### Run through LSTM decoder
        hidden_layers, _ = self.lstm(zero_input, (h0, c0))
        
        ### Unbind the hidden layers
        unbound = torch.unbind(hidden_layers, 1)
        
        ### Run each hidden layer through the prediction linear layer
        combined = [self.prediction_layer(i) for i in unbound]
        
        ### Stack predictions
        combined = torch.stack(combined, 1)
        
        final = F.sigmoid(combined)
        
        return final

### DenseNet201-LSTM

In [None]:
### Define Class
class DenseNet201_LSTM(nn.Module):
    def __init__(self, dpout):
        super(DenseNet201_LSTM, self).__init__()
        
        ### Number of labels
        self.num_labels = 22
        
        
        ### Import DenseNet201
        densenet201 = models.densenet201(pretrained = True)
        
        ### Remove classifier
        densenet201 = nn.Sequential(*list(densenet201.children())[:-1])
        
        ### Freeze parameters
        for param in densenet201.parameters():
            param.requires_grad = False
            
            
        ### CNN Encoder
        self.cnn = densenet201
        
        ### CNN output size
        self.cnn_output_size = 1920
            
        
        ### Create LSTM decoder
        self.lstm = nn.LSTM(input_size = 1, 
                              hidden_size = self.cnn_output_size, ### Output size for each hidden state
                              num_layers = 1,
                              dropout = dpout, ### Dropout Rate,
                              batch_first = True
                             )
        
        
        ### Convert image representation to lstm hidden size
#         self.linear_image_to_lstm = nn.Linear(self.cnn_output_size, hiddenSize)
        
        ### Prediction layer
        self.prediction_layer = nn.Linear(self.cnn_output_size, 1)
        
    
    ### Forward pass
    def forward(self, images):
        
        ### Pass images to CNN encoder
        image_emb = self.cnn(images) 
        
        
        ### Flatten Image embeddings
        image_emb = image_emb.view(-1, self.cnn_output_size)
        
        
        ### Prep intial hidden state and cell state
#         h0 = torch.unsqueeze(self.linear_image_to_lstm(image_emb), 0)
        h0 = torch.unsqueeze(image_emb, 0)
        c0 = torch.autograd.Variable(torch.zeros(h0.size(0), h0.size(1), h0.size(2)).cuda(), requires_grad = False)
        
        zero_input = torch.autograd.Variable(torch.zeros(image_emb.size(0), self.num_labels, 1).cuda(), requires_grad = False)
        
        ### Run through LSTM decoder
        hidden_layers, _ = self.lstm(zero_input, (h0, c0))
        
        ### Unbind the hidden layers
        unbound = torch.unbind(hidden_layers, 1)
        
        ### Run each hidden layer through the prediction linear layer
        combined = [self.prediction_layer(i) for i in unbound]
        
        ### Stack predictions
        combined = torch.stack(combined, 1)
        
        final = F.sigmoid(combined)
        
        return final

### VGG11_BN-LSTM

In [None]:
### Define Class
class VGG11_BN_LSTM(nn.Module):
    def __init__(self, dpout):
        super(VGG11_BN_LSTM, self).__init__()
        
        ### Number of labels
        self.num_labels = 22
        
        ### Import VGG11-BN
        vgg11bn = models.vgg11_bn(pretrained = True)
        
        ### Remove classifier
        vgg11bn = nn.Sequential(*list(vgg11bn.children())[:-1])
        
        ### Freeze parameters
        for param in vgg11bn.parameters():
            param.requires_grad = False
            
            
        ### CNN Encoder
        self.cnn = vgg11bn
        
        ### CNN output size
        self.cnn_output_size = 25088
            
        
        ### Create LSTM decoder
        self.lstm = nn.LSTM(input_size = 1, 
                              hidden_size = self.cnn_output_size, ### Output size for each hidden state
                              num_layers = 1,
                              dropout = dpout, ### Dropout Rate,
                              batch_first = True
                             )
        
        
        ### Convert image representation to lstm hidden size
#         self.linear_image_to_lstm = nn.Linear(self.cnn_output_size, hiddenSize)
        
        ### Prediction layer
        self.prediction_layer = nn.Linear(self.cnn_output_size, 1)
        
    
    ### Forward pass
    def forward(self, images):
        
        ### Pass images to CNN encoder
        image_emb = self.cnn(images) 
        
        
        ### Flatten Image embeddings
        image_emb = image_emb.view(-1, self.cnn_output_size)
        
        
        ### Prep intial hidden state and cell state
#         h0 = torch.unsqueeze(self.linear_image_to_lstm(image_emb), 0)
        h0 = torch.unsqueeze(image_emb, 0)
        c0 = torch.autograd.Variable(torch.zeros(h0.size(0), h0.size(1), h0.size(2)).cuda(), requires_grad = False)
        
        zero_input = torch.autograd.Variable(torch.zeros(image_emb.size(0), self.num_labels, 1).cuda(), requires_grad = False)
        
        ### Run through LSTM decoder
        hidden_layers, _ = self.lstm(zero_input, (h0, c0))
        
        ### Unbind the hidden layers
        unbound = torch.unbind(hidden_layers, 1)
        
        ### Run each hidden layer through the prediction linear layer
        combined = [self.prediction_layer(i) for i in unbound]
        
        ### Stack predictions
        combined = torch.stack(combined, 1)
        
        final = F.sigmoid(combined)
        
        return final

### VGG16_BN-LSTM

In [None]:
### Define Class
class VGG16_BN_LSTM(nn.Module):
    def __init__(self, dpout):
        super(VGG16_BN_LSTM, self).__init__()
        
        ### Number of labels
        self.num_labels = 22
        
        ### Import VGG11-BN
        vgg16bn = models.vgg16_bn(pretrained = True)
        
        ### Remove classifier
        vgg16bn = nn.Sequential(*list(vgg16bn.children())[:-1])
        
        ### Freeze parameters
        for param in vgg16bn.parameters():
            param.requires_grad = False
            
            
        ### CNN Encoder
        self.cnn = vgg16bn
        
        ### CNN output size
        self.cnn_output_size = 25088
            
        
        ### Create LSTM decoder
        self.lstm = nn.LSTM(input_size = 1, 
                              hidden_size = self.cnn_output_size, ### Output size for each hidden state
                              num_layers = 1,
                              dropout = dpout, ### Dropout Rate,
                              batch_first = True
                             )
        
        
        ### Convert image representation to lstm hidden size
#         self.linear_image_to_lstm = nn.Linear(self.cnn_output_size, hiddenSize)
        
        ### Prediction layer
        self.prediction_layer = nn.Linear(self.cnn_output_size, 1)
        
    
    ### Forward pass
    def forward(self, images):
        
        ### Pass images to CNN encoder
        image_emb = self.cnn(images) 
        
        
        ### Flatten Image embeddings
        image_emb = image_emb.view(-1, self.cnn_output_size)
        
        
        ### Prep intial hidden state and cell state
#         h0 = torch.unsqueeze(self.linear_image_to_lstm(image_emb), 0)
        h0 = torch.unsqueeze(image_emb, 0)
        c0 = torch.autograd.Variable(torch.zeros(h0.size(0), h0.size(1), h0.size(2)).cuda(), requires_grad = False)
        
        zero_input = torch.autograd.Variable(torch.zeros(image_emb.size(0), self.num_labels, 1).cuda(), requires_grad = False)
        
        ### Run through LSTM decoder
        hidden_layers, _ = self.lstm(zero_input, (h0, c0))
        
        ### Unbind the hidden layers
        unbound = torch.unbind(hidden_layers, 1)
        
        ### Run each hidden layer through the prediction linear layer
        combined = [self.prediction_layer(i) for i in unbound]
        
        ### Stack predictions
        combined = torch.stack(combined, 1)
        
        final = F.sigmoid(combined)
        
        return final

### VGG19_BN-LSTM

In [None]:
### Define Class
class VGG19_BN_LSTM(nn.Module):
    def __init__(self, dpout):
        super(VGG19_BN_LSTM, self).__init__()
        
        ### Number of labels
        self.num_labels = 22
        
        ### Import VGG11-BN
        vgg19bn = models.vgg19_bn(pretrained = True)
        
        ### Remove classifier
        vgg19bn = nn.Sequential(*list(vgg19bn.children())[:-1])
        
        ### Freeze parameters
        for param in vgg19bn.parameters():
            param.requires_grad = False
            
            
        ### CNN Encoder
        self.cnn = vgg19bn
        
        ### CNN output size
        self.cnn_output_size = 25088
            
        
        ### Create LSTM decoder
        self.lstm = nn.LSTM(input_size = 1, 
                              hidden_size = self.cnn_output_size, ### Output size for each hidden state
                              num_layers = 1,
                              dropout = dpout, ### Dropout Rate,
                              batch_first = True
                             )
        
        
        ### Convert image representation to lstm hidden size
#         self.linear_image_to_lstm = nn.Linear(self.cnn_output_size, hiddenSize)
        
        ### Prediction layer
        self.prediction_layer = nn.Linear(self.cnn_output_size, 1)
        
    
    ### Forward pass
    def forward(self, images):
        
        ### Pass images to CNN encoder
        image_emb = self.cnn(images) 
        
        
        ### Flatten Image embeddings
        image_emb = image_emb.view(-1, self.cnn_output_size)
        
        
        ### Prep intial hidden state and cell state
#         h0 = torch.unsqueeze(self.linear_image_to_lstm(image_emb), 0)
        h0 = torch.unsqueeze(image_emb, 0)
        c0 = torch.autograd.Variable(torch.zeros(h0.size(0), h0.size(1), h0.size(2)).cuda(), requires_grad = False)
        
        zero_input = torch.autograd.Variable(torch.zeros(image_emb.size(0), self.num_labels, 1).cuda(), requires_grad = False)
        
        ### Run through LSTM decoder
        hidden_layers, _ = self.lstm(zero_input, (h0, c0))
        
        ### Unbind the hidden layers
        unbound = torch.unbind(hidden_layers, 1)
        
        ### Run each hidden layer through the prediction linear layer
        combined = [self.prediction_layer(i) for i in unbound]
        
        ### Stack predictions
        combined = torch.stack(combined, 1)
        
        final = F.sigmoid(combined)
        
        return final