## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from efficientnet_pytorch import EfficientNet
from torch.nn import BCEWithLogitsLoss, BCELoss
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix, f1_score, accuracy_score, roc_auc_score
import pickle
from transformers import RobertaTokenizer,RobertaModel, XLNetTokenizer, RobertaTokenizer, BertForSequenceClassification, XLNetForSequenceClassification, RobertaModel, AdamW
from tqdm import tqdm, trange
from ast import literal_eval

Using TensorFlow backend.


In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

'GeForce GTX 1060'

## Load and Preprocess Training Data

Dataset will be tokenized then split into training and validation sets. The validation set will be used to monitor training. For testing a separate test set will be loaded for analysis.

In [5]:
sample_size = 4365 #4365

In [6]:
with open('../data/public_train/train_np_img_norm','rb') as f: X_img_train = pickle.load(f)
X_img_train.shape

(4365, 224, 224, 3)

In [7]:
with open('../data/public_train/test_np_img_norm', 'rb') as f: X_img_test = pickle.load(f)
X_img_test.shape

(1213, 224, 224, 3)

In [8]:
with open('../data/public_train/val_np_img_norm', 'rb') as f: X_img_val = pickle.load(f)
X_img_val.shape


(486, 224, 224, 3)

In [9]:
X_img_train = X_img_train[:sample_size]
X_img_val = X_img_val
X_img_test = X_img_test

In [10]:
X_img_val = np.reshape(X_img_val, (X_img_val.shape[0], 3, 224, 224))
X_img_val.shape

(486, 3, 224, 224)

In [11]:
X_img_test = np.reshape(X_img_test, (X_img_test.shape[0], 3, 224, 224))

In [12]:
X_img_train = np.reshape(X_img_train, (X_img_train.shape[0], 3, 224, 224))

In [13]:
df = pd.read_csv('../data/public_train/train_data.csv')
df.head()

Unnamed: 0,id,image_id,Angry,Disgust,Fear,Happy,Sad,Surprise,Neutral,Others,dialog,narration,text,text_clean,emotion_list
0,575.0,1308_48_2,1,0,0,1,0,0,0,0,['wait a minute im not going to hurt you !'],[],['wait a minute im not going to hurt you !'],['wait a minute i am not going to hurt you !'],"['Angry', 'Happy']"
1,5395.0,3766_29_2,0,1,0,1,0,0,1,0,[' hear that trody ? they meed a nsw carew maa...,[],[' hear that trody ? they meed a nsw carew maa...,"['he thought they need a new careman , looks l...","['Disgust', 'Happy', 'Neutral']"
2,2004.0,2112_17_7,1,1,0,0,0,0,0,0,['the comet leaps into action his bouyancy all...,['the comet leaps into action his bouyancy all...,['the comet leaps into action his bouyancy all...,"['the comet leaps into action , his bouyancy a...","['Angry', 'Disgust']"
3,4863.0,3458_16_7,0,0,0,0,0,0,1,0,"['its in there . isnt mate ?', ""yeah - t ' s i...",[],"['its in there . isnt mate ?', ""yeah - t ' s i...","['is it there .is not mate?', 'yeah t is in ...",['Neutral']
4,5146.0,2338_19_3,0,0,1,0,0,1,1,0,"['listen und pass der yord along . bzzzz21', '...",[],"['listen und pass der yord along . bzzzz21', '...","['listen and pass your way . bzzzz21 .', '...","['Fear', 'Surprise', 'Neutral']"


In [14]:
len(df)

4365

In [15]:
df = df[:sample_size]

In [16]:
print('Unique ocr_texts: ', df.text_clean.nunique() == df.shape[0])
print('Null values: ', df.isnull().values.any())
# df[df.isna().any(axis=1)]

Unique comments:  False
Null values:  False


In [17]:
print('average sentence length: ', df.text_clean.str.split().str.len().mean())
print('stdev sentence length: ', df.text_clean.str.split().str.len().std())

average sentence length:  25.596334478808707
stdev sentence length:  24.795639258733278


In [18]:
cols = df.columns
label_cols = list(cols[2:10])
num_labels = len(label_cols)
print('Label columns: ', label_cols)

Label columns:  ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral', 'Others']


In [19]:
print('Count of 1 per label: \n', df[label_cols].sum(), '\n') # Label counts, may need to downsample or upsample
print('Count of 0 per label: \n', df[label_cols].eq(0).sum())

Count of 1 per label: 
 Angry       1746
Disgust     1589
Fear        1472
Happy       1776
Sad          632
Surprise    1502
Neutral     2962
Others       303
dtype: int64 

Count of 0 per label: 
 Angry       2619
Disgust     2776
Fear        2893
Happy       2589
Sad         3733
Surprise    2863
Neutral     1403
Others      4062
dtype: int64


In [20]:
# df = df.sample(frac=1).reset_index(drop=True) #shuffle rows

In [21]:
df['one_hot_labels'] = list(df[label_cols].values)
df.head()

Unnamed: 0,id,image_id,Angry,Disgust,Fear,Happy,Sad,Surprise,Neutral,Others,dialog,narration,text,text_clean,emotion_list,one_hot_labels
0,575.0,1308_48_2,1,0,0,1,0,0,0,0,['wait a minute im not going to hurt you !'],[],['wait a minute im not going to hurt you !'],['wait a minute i am not going to hurt you !'],"['Angry', 'Happy']","[1, 0, 0, 1, 0, 0, 0, 0]"
1,5395.0,3766_29_2,0,1,0,1,0,0,1,0,[' hear that trody ? they meed a nsw carew maa...,[],[' hear that trody ? they meed a nsw carew maa...,"['he thought they need a new careman , looks l...","['Disgust', 'Happy', 'Neutral']","[0, 1, 0, 1, 0, 0, 1, 0]"
2,2004.0,2112_17_7,1,1,0,0,0,0,0,0,['the comet leaps into action his bouyancy all...,['the comet leaps into action his bouyancy all...,['the comet leaps into action his bouyancy all...,"['the comet leaps into action , his bouyancy a...","['Angry', 'Disgust']","[1, 1, 0, 0, 0, 0, 0, 0]"
3,4863.0,3458_16_7,0,0,0,0,0,0,1,0,"['its in there . isnt mate ?', ""yeah - t ' s i...",[],"['its in there . isnt mate ?', ""yeah - t ' s i...","['is it there .is not mate?', 'yeah t is in ...",['Neutral'],"[0, 0, 0, 0, 0, 0, 1, 0]"
4,5146.0,2338_19_3,0,0,1,0,0,1,1,0,"['listen und pass der yord along . bzzzz21', '...",[],"['listen und pass der yord along . bzzzz21', '...","['listen and pass your way . bzzzz21 .', '...","['Fear', 'Surprise', 'Neutral']","[0, 0, 1, 0, 0, 1, 1, 0]"


In [22]:
train_labels = list(df.one_hot_labels.values)
ocr_texts = list(df.text_clean.values)

Load the pretrained tokenizer that corresponds to your choice in model. e.g.,

```
BERT:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base', do_lower_case=True) 

XLNet:
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased', do_lower_case=False) 

RoBERTa:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base', do_lower_case=False)
```


In order to avoid memory issues with Google Colab, I enforce a max_length of 100 tokens. Note that some sentences may not adequately represent each label because of this.

In [23]:
max_length = 35
tokenizer = RobertaTokenizer.from_pretrained('roberta-base', do_lower_case=True) # tokenizer
encodings = tokenizer.batch_encode_plus(ocr_texts,max_length=max_length,pad_to_max_length=True) # tokenizer's encoding method
print('tokenizer outputs: ', encodings.keys())

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


tokenizer outputs:  dict_keys(['input_ids', 'attention_mask'])


In [24]:
encodings.keys()

dict_keys(['input_ids', 'attention_mask'])

In [25]:
train_inputs = encodings['input_ids'] # tokenized and encoded sentences
# train_token_types = encodings['token_type_ids'] # token type ids
train_masks = encodings['attention_mask'] # attention masks

Be sure to handle all classes during validation using "stratify" during train/validation split:

In [28]:
# Convert all of our data into torch tensors, the required datatype for our model
train_inputs = torch.tensor(train_inputs)
train_labels = torch.tensor(train_labels)
train_masks = torch.tensor(train_masks)

In [29]:
validation_df = pd.read_csv('../data/public_train/val_data.csv')
# validation_labels_df = pd.read_csv('validation_labels.csv')
# validation_df = validation_df.merge(validation_labels_df, on='id', how='left')
validation_label_cols = list(validation_df.columns[2:10])
print('Null values: ', validation_df.isnull().values.any()) #should not be any null sentences or labels
print('Same columns between train and validation: ', label_cols == validation_label_cols) #columns should be the same
validation_df.head()

Null values:  False
Same columns between train and validation:  True


Unnamed: 0,id,image_id,Angry,Disgust,Fear,Happy,Sad,Surprise,Neutral,Others,dialog,narration,text,text_clean,emotion_list
0,340.0,1179_7_2,0,0,0,0,1,1,0,0,['oops ! cheap twine or must have gained weigh...,"['plummets down', 'he vaul7 front']",['oops ! cheap twine or must have gained weigh...,['ops ! cheap twine or must have gained weig...,"['Sad', 'Surprise']"
1,3831.0,2258_29_2,1,1,1,1,0,0,1,0,"[' just a minute , you ...']",[],"[' just a minute , you ...']","['just a minute , you . . . .']","['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral']"
2,2465.0,3832_23_3,1,1,0,0,1,0,1,0,"[""they think they bungled and are going to rep...",[],"[""they think they bungled and are going to rep...",['they think they bungled and are going to rep...,"['Angry', 'Disgust', 'Sad', 'Neutral']"
3,672.0,1377_39_7,1,0,0,0,0,1,1,0,['owuch'],[],['owuch'],['wow .'],"['Angry', 'Surprise', 'Neutral']"
4,5145.0,777_20_0,1,0,0,0,0,0,1,0,"[""look nightmare ! o he ' s gone through w the...",[],"[""look nightmare ! o he ' s gone through w the...",['look nightmare ! he is gone through the we...,"['Angry', 'Neutral']"


In [30]:
validation_df = validation_df[~validation_df[validation_label_cols].eq(-1).any(axis=1)] #remove irrelevant rows/ocr_texts with -1 values
validation_df['one_hot_labels'] = list(validation_df[validation_label_cols].values)
validation_df.head()

Unnamed: 0,id,image_id,Angry,Disgust,Fear,Happy,Sad,Surprise,Neutral,Others,dialog,narration,text,text_clean,emotion_list,one_hot_labels
0,340.0,1179_7_2,0,0,0,0,1,1,0,0,['oops ! cheap twine or must have gained weigh...,"['plummets down', 'he vaul7 front']",['oops ! cheap twine or must have gained weigh...,['ops ! cheap twine or must have gained weig...,"['Sad', 'Surprise']","[0, 0, 0, 0, 1, 1, 0, 0]"
1,3831.0,2258_29_2,1,1,1,1,0,0,1,0,"[' just a minute , you ...']",[],"[' just a minute , you ...']","['just a minute , you . . . .']","['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral']","[1, 1, 1, 1, 0, 0, 1, 0]"
2,2465.0,3832_23_3,1,1,0,0,1,0,1,0,"[""they think they bungled and are going to rep...",[],"[""they think they bungled and are going to rep...",['they think they bungled and are going to rep...,"['Angry', 'Disgust', 'Sad', 'Neutral']","[1, 1, 0, 0, 1, 0, 1, 0]"
3,672.0,1377_39_7,1,0,0,0,0,1,1,0,['owuch'],[],['owuch'],['wow .'],"['Angry', 'Surprise', 'Neutral']","[1, 0, 0, 0, 0, 1, 1, 0]"
4,5145.0,777_20_0,1,0,0,0,0,0,1,0,"[""look nightmare ! o he ' s gone through w the...",[],"[""look nightmare ! o he ' s gone through w the...",['look nightmare ! he is gone through the we...,"['Angry', 'Neutral']","[1, 0, 0, 0, 0, 0, 1, 0]"


In [31]:
len(validation_df)

486

In [32]:
X_img_val.shape

(486, 3, 224, 224)

In [33]:
# Gathering input data
validation_labels = list(validation_df.one_hot_labels.values)
validation_ocr_texts = list(validation_df.text_clean.values)

In [34]:
# Encoding input data
validation_encodings = tokenizer.batch_encode_plus(validation_ocr_texts,max_length=max_length,pad_to_max_length=True)
validation_input_ids = validation_encodings['input_ids']
# validation_token_type_ids = validation_encodings['token_type_ids']
validation_attention_masks = validation_encodings['attention_mask']

In [35]:
# Make tensors out of data
validation_inputs = torch.tensor(validation_input_ids)
validation_labels = torch.tensor(validation_labels)
validation_masks = torch.tensor(validation_attention_masks)

In [36]:
test_df = pd.read_csv('../data/public_train/test_data.csv')
# test_labels_df = pd.read_csv('test_labels.csv')
# test_df = test_df.merge(test_labels_df, on='id', how='left')
test_label_cols = list(test_df.columns[2:10])
print('Null values: ', test_df.isnull().values.any()) #should not be any null sentences or labels
print('Same columns between train and test: ', label_cols == test_label_cols) #columns should be the same
test_df.head()

Null values:  False
Same columns between train and test:  True


Unnamed: 0,id,image_id,Angry,Disgust,Fear,Happy,Sad,Surprise,Neutral,Others,dialog,narration,text,text_clean,emotion_list
0,4184.0,3812_3_3,1,1,1,0,0,0,1,0,['or this what kind of reputation are these pi...,"[""tha day prog ahranean the recovery ou thats ...",['or this what kind of reputation are these pi...,['or this what kind of reputation are these pi...,"['Angry', 'Disgust', 'Fear', 'Neutral']"
1,132.0,1088_28_3,1,0,1,0,0,0,1,0,"[""he ' s not telling all he knows do you think...",[],"[""he ' s not telling all he knows do you think...","['he is not telling all he knows , do you thin...","['Angry', 'Fear', 'Neutral']"
2,3543.0,479_14_0,1,0,0,0,0,0,1,0,"[""you big stupid why don ' t you watch where y...",[],"[""you big stupid why don ' t you watch where y...",['you big stupid why do not you watch where yo...,"['Angry', 'Neutral']"
3,4692.0,859_24_1,1,1,1,0,0,1,1,0,"[""fight it out with him it ' s the gallows if ...",[],"[""fight it out with him it ' s the gallows if ...",['fight it out with him it is the gallows if h...,"['Angry', 'Disgust', 'Fear', 'Surprise', 'Neut..."
4,4762.0,2260_47_8,0,0,1,1,0,0,1,0,['this way to the roof'],['the fleeing boxer and photographer'],"['this way to the roof', 'the fleeing boxer an...","['this way to the roof .', 'the fleeing boxe...","['Fear', 'Happy', 'Neutral']"


In [37]:
test_df = test_df[~test_df[test_label_cols].eq(-1).any(axis=1)] #remove irrelevant rows/ocr_texts with -1 values
test_df['one_hot_labels'] = list(test_df[test_label_cols].values)
test_df.head()

Unnamed: 0,id,image_id,Angry,Disgust,Fear,Happy,Sad,Surprise,Neutral,Others,dialog,narration,text,text_clean,emotion_list,one_hot_labels
0,4184.0,3812_3_3,1,1,1,0,0,0,1,0,['or this what kind of reputation are these pi...,"[""tha day prog ahranean the recovery ou thats ...",['or this what kind of reputation are these pi...,['or this what kind of reputation are these pi...,"['Angry', 'Disgust', 'Fear', 'Neutral']","[1, 1, 1, 0, 0, 0, 1, 0]"
1,132.0,1088_28_3,1,0,1,0,0,0,1,0,"[""he ' s not telling all he knows do you think...",[],"[""he ' s not telling all he knows do you think...","['he is not telling all he knows , do you thin...","['Angry', 'Fear', 'Neutral']","[1, 0, 1, 0, 0, 0, 1, 0]"
2,3543.0,479_14_0,1,0,0,0,0,0,1,0,"[""you big stupid why don ' t you watch where y...",[],"[""you big stupid why don ' t you watch where y...",['you big stupid why do not you watch where yo...,"['Angry', 'Neutral']","[1, 0, 0, 0, 0, 0, 1, 0]"
3,4692.0,859_24_1,1,1,1,0,0,1,1,0,"[""fight it out with him it ' s the gallows if ...",[],"[""fight it out with him it ' s the gallows if ...",['fight it out with him it is the gallows if h...,"['Angry', 'Disgust', 'Fear', 'Surprise', 'Neut...","[1, 1, 1, 0, 0, 1, 1, 0]"
4,4762.0,2260_47_8,0,0,1,1,0,0,1,0,['this way to the roof'],['the fleeing boxer and photographer'],"['this way to the roof', 'the fleeing boxer an...","['this way to the roof .', 'the fleeing boxe...","['Fear', 'Happy', 'Neutral']","[0, 0, 1, 1, 0, 0, 1, 0]"


In [38]:
# Gathering input data
test_labels = list(test_df.one_hot_labels.values)
test_ocr_texts = list(test_df.text_clean.values)

In [39]:
# Encoding input data
test_encodings = tokenizer.batch_encode_plus(test_ocr_texts,max_length=max_length,pad_to_max_length=True)
test_input_ids = test_encodings['input_ids']
# test_token_type_ids = test_encodings['token_type_ids']
test_attention_masks = test_encodings['attention_mask']

In [40]:
# Make tensors out of data
test_inputs = torch.tensor(test_input_ids)
test_labels = torch.tensor(test_labels)
test_masks = torch.tensor(test_attention_masks)

In [41]:
text_train_data = TensorDataset(train_inputs, train_masks, train_labels)
img_train_data = TensorDataset(torch.from_numpy(X_img_train), train_labels)

text_val_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
img_val_data = TensorDataset(torch.from_numpy(X_img_val), validation_labels)

text_test_data = TensorDataset(test_inputs, test_masks, test_labels)
img_test_data = TensorDataset(torch.from_numpy(X_img_test), test_labels)

batch_size = 16

text_train_loader = DataLoader(text_train_data, batch_size=batch_size)
img_train_loader = DataLoader(img_train_data, batch_size=batch_size)

text_val_loader = DataLoader(text_val_data, batch_size=batch_size)
img_val_loader = DataLoader(img_val_data, batch_size=batch_size)

text_test_loader = DataLoader(text_test_data, batch_size=batch_size)
img_test_loader = DataLoader(img_test_data, batch_size=batch_size)

print(len(text_train_loader), len(img_train_loader))
print(len(text_val_loader), len(img_val_loader))
print(len(text_test_loader), len(img_test_loader))

273 273
31 31
76 76


## Load Model & Set Params

In [42]:
class CNN_BERT(nn.Module):
  def __init__(self):
    super(CNN_BERT, self).__init__()

    # BERT for the text overview
    self.text_model = RobertaModel.from_pretrained('roberta-base')
    self.dropout = nn.Dropout(0.3)
    self.text_fc = nn.Linear(768,32)

    # CNN for the posters
    self.effnet = EfficientNet.from_pretrained('efficientnet-b2')
    self.effnet_fc = nn.Linear(1000, 32)
    self.n_out = 8
#     self.concat_dropout = nn.Dropout(0.1)
    self.output_fc = nn.Linear(64, self.n_out)


  def forward(self, input_ids, attention_mask, cnn_inp):
    text_outputs = self.text_model(input_ids, attention_mask)
#     text_outputs = text_outputs['last_hidden_state']
    text_outputs = text_outputs[0][:, 0, :]
    text_outputs = self.dropout(text_outputs)
    text_outputs = self.text_fc(text_outputs)
    
    x = self.effnet(cnn_inp)
    x = self.dropout(x)
    cnn_out = F.relu(self.effnet_fc(x))
    combined_inp = torch.cat((cnn_out, text_outputs), 1)
#     out = torch.sigmoid(self.output_fc(self.concat_dropout(combined_inp)))
    out = torch.sigmoid(self.output_fc(combined_inp))

    return out

In [43]:
model = CNN_BERT()
model.cuda()

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Loaded pretrained weights for efficientnet-b2


CNN_BERT(
  (text_model): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,

Setting custom optimization parameters for the AdamW optimizer https://huggingface.co/transformers/main_classes/optimizer_schedules.html

In [44]:
# setting custom optimization parameters. You may implement a scheduler here as well.
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.01}
]

In [45]:
len(param_optimizer) #366

506

In [46]:
# optimizer_grouped_parameters

In [47]:
optimizer = AdamW(optimizer_grouped_parameters,lr=2e-5,correct_bias=True)
# optimizer = AdamW(model.parameters(),lr=1e-5,weight_decay=1e-2)  # Default optimization
# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=len(text_train_loader), epochs=5,anneal_strategy='linear')

## Train Model

In [48]:
# Store our loss and accuracy for plotting
train_loss_set = []
val_loss_set = []

# Number of training epochs (authors recommend between 2 and 4)
epochs = 6

# trange is a tqdm wrapper around the normal python range
for _ in trange(epochs, desc="Epoch"):

  # Training
  
  # Set our model to training mode (as opposed to evaluation mode)
  model.train()

  # Tracking variables
  tr_loss = 0 #running loss
  val_loss = 0 #running loss
  nb_tr_examples, nb_tr_steps = 0, 0
  nb_val_steps = 0
  
  # Train the data for one epoch
  for text_batch, img_batch in zip(text_train_loader,img_train_loader):
    # Add batch to GPU
    text_batch = tuple(t.to(device) for t in text_batch)
    img_batch = tuple(t.to(device) for t in img_batch)
    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask, b_labels = text_batch
    cnn_inp, cnn_labels = img_batch
    # Clear out the gradients (by default they accumulate)
    optimizer.zero_grad()


    # Forward pass for multilabel classification
    outputs = model(b_input_ids, b_input_mask,cnn_inp)
#     loss_func = BCEWithLogitsLoss()
    loss_func = BCELoss()
#     loss = loss_func(outputs.view(-1,num_labels),b_labels.type_as(outputs).view(-1,num_labels)) #convert labels to float for calculation
    loss = loss_func(outputs.squeeze(), b_labels.float())
    # loss_func = BCELoss() 
    # loss = loss_func(torch.sigmoid(logits.view(-1,num_labels)),b_labels.type_as(logits).view(-1,num_labels)) #convert labels to float for calculation
    train_loss_set.append(loss.item())    

    # Backward pass
    loss.backward()
    # Update parameters and take a step using the computed gradient
    optimizer.step()
#     scheduler.step()
    # Update tracking variables
    tr_loss += loss.item()
    nb_tr_examples += b_input_ids.size(0)
    nb_tr_steps += 1

  print("Train loss: {}".format(tr_loss/nb_tr_steps))

###############################################################################

  # Validation

  # Put model in evaluation mode to evaluate loss on the validation set
  model.eval()

  # Variables to gather full output
  logit_preds,true_labels,pred_labels,tokenized_texts = [],[],[],[]

  # Predict
  for text_batch, img_batch in zip(text_val_loader,img_val_loader):
    # Unpack the inputs from our dataloader
    text_batch = tuple(t.to(device) for t in text_batch)
    img_batch = tuple(t.to(device) for t in img_batch)
    b_input_ids, b_input_mask, b_labels = text_batch
    cnn_inp, cnn_labels = img_batch
    with torch.no_grad():
      # Forward pass
      outs = model(b_input_ids, b_input_mask,cnn_inp)
      
      v_loss = loss_func(outs.squeeze(), b_labels.float())
#       v_loss = loss_func(outs.view(-1,num_labels),b_labels.type_as(outs).view(-1,num_labels)) #convert labels to float for calculation
      val_loss_set.append(v_loss.item())  
      val_loss += v_loss.item()
      pred_label = outs.squeeze()

#       b_logit_pred = b_logit_pred.detach().cpu().numpy()
      pred_label = pred_label.to('cpu').numpy()
      b_labels = b_labels.to('cpu').numpy()

    tokenized_texts.append(b_input_ids)
#     logit_preds.append(b_logit_pred)
    true_labels.append(b_labels)
    pred_labels.append(pred_label)
    nb_val_steps += 1

  print("Val loss: {}".format(val_loss/nb_val_steps))
  # Flatten outputs
  pred_labels = [item for sublist in pred_labels for item in sublist]
  true_labels = [item for sublist in true_labels for item in sublist]
  
  # Calculate Accuracy
  threshold = 0.50
  pred_bools = [pl>threshold for pl in pred_labels]
  true_bools = [tl==1 for tl in true_labels]
  val_f1_accuracy = f1_score(true_bools,pred_bools,average='macro')*100
  val_flat_accuracy = accuracy_score(true_bools, pred_bools)*100
  val_roc_score = roc_auc_score(true_bools, pred_bools,average='macro')*100

  print('F1 Validation Accuracy: ', val_f1_accuracy)
  print('Flat Validation Accuracy: ', val_flat_accuracy)
  print('ROC AUC Score: ', val_roc_score)

Epoch:   0%|          | 0/6 [00:00<?, ?it/s]

Train loss: 0.5830012733464712


Epoch:  17%|█▋        | 1/6 [08:37<43:08, 517.66s/it]

Val loss: 0.5767187053157438
F1 Validation Accuracy:  19.4297565619797
Flat Validation Accuracy:  4.938271604938271
ROC AUC Score:  51.909010292892276
Train loss: 0.554130093518631


Epoch:  33%|███▎      | 2/6 [16:25<32:33, 488.40s/it]

Val loss: 0.5637662747213917
F1 Validation Accuracy:  31.199832368895947
Flat Validation Accuracy:  8.641975308641975
ROC AUC Score:  55.1909624976472
Train loss: 0.5291926261487898


Epoch:  50%|█████     | 3/6 [24:07<23:49, 476.39s/it]

Val loss: 0.5635578795786826
F1 Validation Accuracy:  33.36139870853949
Flat Validation Accuracy:  9.465020576131687
ROC AUC Score:  55.63976919743288
Train loss: 0.49686577195649617


Epoch:  67%|██████▋   | 4/6 [31:48<15:40, 470.41s/it]

Val loss: 0.5761572064891938
F1 Validation Accuracy:  35.920994789954655
Flat Validation Accuracy:  9.053497942386832
ROC AUC Score:  56.4919075159342
Train loss: 0.4610640043085748


Epoch:  83%|████████▎ | 5/6 [40:42<08:13, 493.14s/it]

Val loss: 0.5902829170227051
F1 Validation Accuracy:  36.3914379802183
Flat Validation Accuracy:  8.024691358024691
ROC AUC Score:  56.45966299829996
Train loss: 0.4182310619633713


Epoch: 100%|██████████| 6/6 [49:03<00:00, 490.64s/it]

Val loss: 0.6089247032519309
F1 Validation Accuracy:  39.327157884659755
Flat Validation Accuracy:  7.4074074074074066
ROC AUC Score:  57.44911751778554





In [50]:
# torch.save(model.state_dict(), 'roberta_effnet_model_1')

In [48]:
# model = CNN_BERT()
# model.load_state_dict(torch.load('roberta_effnet_model_1'))
# model.cuda()

## Load and Preprocess Test Data

## Prediction and Metics

In [51]:
# Test

# Put model in evaluation mode to evaluate loss on the validation set
model.eval()

#track variables
logit_preds,true_labels,pred_labels,tokenized_texts = [],[],[],[]

# Predict
for text_batch, img_batch in zip(text_test_loader,img_test_loader):
  text_batch = tuple(t.to(device) for t in text_batch)
  img_batch = tuple(t.to(device) for t in img_batch)
  b_input_ids, b_input_mask, b_labels = text_batch
  cnn_inp, cnn_labels = img_batch
    
  with torch.no_grad():
    # Forward pass
    outs = model(b_input_ids, b_input_mask,cnn_inp)
    pred_label = outs.squeeze()
    pred_label = pred_label.to('cpu').numpy()
    b_labels = b_labels.to('cpu').numpy()

  tokenized_texts.append(b_input_ids)
#   logit_preds.append(b_logit_pred)
  true_labels.append(b_labels)
  pred_labels.append(pred_label)

# Flatten outputs
tokenized_texts = [item for sublist in tokenized_texts for item in sublist]
pred_labels = [item for sublist in pred_labels for item in sublist]
true_labels = [item for sublist in true_labels for item in sublist]
# Converting flattened binary values to boolean values
true_bools = [tl==1 for tl in true_labels]

We need to threshold our sigmoid function outputs which range from [0, 1]. Below I use 0.50 as a threshold.

In [52]:
pred_bools = [pl>0.50 for pl in pred_labels] #boolean output after thresholding

# Print and save classification report
print('Test F1 Accuracy: ', f1_score(true_bools, pred_bools,average='micro'))
print('Test ROC AUC Score: ', roc_auc_score(true_bools, pred_bools,average='macro'))
print('Test Flat Accuracy: ', accuracy_score(true_bools, pred_bools),'\n')
clf_report = classification_report(true_bools,pred_bools,target_names=test_label_cols)
pickle.dump(clf_report, open('classification_report.txt','wb')) #save report
print(clf_report)

Test F1 Accuracy:  0.5262985417008028
Test ROC AUC Score:  0.5659047533037628
Test Flat Accuracy:  0.06430338004946413 

              precision    recall  f1-score   support

       Angry       0.56      0.39      0.46       474
     Disgust       0.52      0.38      0.44       450
        Fear       0.62      0.27      0.38       444
       Happy       0.58      0.66      0.61       516
         Sad       0.67      0.01      0.02       206
    Surprise       0.37      0.30      0.33       412
     Neutral       0.70      0.83      0.76       797
      Others       0.50      0.01      0.03        75

   micro avg       0.59      0.48      0.53      3374
   macro avg       0.56      0.36      0.38      3374
weighted avg       0.58      0.48      0.49      3374
 samples avg       0.61      0.51      0.52      3374



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Output Dataframe

In [53]:
idx2label = dict(zip(range(8),label_cols))
print(idx2label)

{0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Sad', 5: 'Surprise', 6: 'Neutral', 7: 'Others'}


In [54]:
# Getting indices of where boolean one hot vector true_bools is True so we can use idx2label to gather label names
true_label_idxs, pred_label_idxs=[],[]
for vals in true_bools:
  true_label_idxs.append(np.where(vals)[0].flatten().tolist())
for vals in pred_bools:
  pred_label_idxs.append(np.where(vals)[0].flatten().tolist())

In [55]:
# Gathering vectors of label names using idx2label
true_label_texts, pred_label_texts = [], []
for vals in true_label_idxs:
  if vals:
    true_label_texts.append([idx2label[val] for val in vals])
  else:
    true_label_texts.append(vals)

for vals in pred_label_idxs:
  if vals:
    pred_label_texts.append([idx2label[val] for val in vals])
  else:
    pred_label_texts.append(vals)

In [56]:
# Decoding input ids to ocr text
ocr_texts = [tokenizer.decode(text,skip_special_tokens=True,clean_up_tokenization_spaces=False) for text in tokenized_texts]

In [57]:
# Converting lists to df
comparisons_df = pd.DataFrame({'ocr_text': ocr_texts, 'true_labels': true_label_texts, 'pred_labels':pred_label_texts})
# comparisons_df.to_csv('comparisons.csv')
comparisons_df.head()

Unnamed: 0,comment_text,true_labels,pred_labels
0,['or this what kind of reputation are these pi...,"[Angry, Disgust, Fear, Neutral]","[Surprise, Neutral]"
1,"['he is not telling all he knows , do you thin...","[Angry, Fear, Neutral]",[Neutral]
2,['you big stupid why do not you watch where yo...,"[Angry, Neutral]","[Angry, Neutral]"
3,['fight it out with him it is the gallows if h...,"[Angry, Disgust, Fear, Surprise, Neutral]","[Angry, Neutral]"
4,"['this way to the roof .', 'the fleeing boxe...","[Fear, Happy, Neutral]","[Angry, Neutral]"


## Bonus - Optimizing threshold value for macro ROC score

Doing this may result in a trade offs between precision, flat accuracy and micro F1 accuracy. You may tune the threshold however you want.

In [58]:
# Calculate Accuracy - maximize roc_auc score by tuning threshold values. First with 'macro_thresholds' on the order of e^-1 then with 'micro_thresholds' on the order of e^-2

macro_thresholds = np.array(range(1,10))/10

roc_auc_results, flat_acc_results = [], []
for th in macro_thresholds:
  pred_bools = [pl>th for pl in pred_labels]
  test_roc_auc_accuracy = roc_auc_score(true_bools,pred_bools,average='macro')
  test_flat_accuracy = accuracy_score(true_bools, pred_bools)
  roc_auc_results.append(test_roc_auc_accuracy)
  flat_acc_results.append(test_flat_accuracy)

best_macro_th = macro_thresholds[np.argmax(roc_auc_results)] #best macro threshold value

micro_thresholds = (np.array(range(10))/100)+best_macro_th #calculating micro threshold values

roc_auc_results, flat_acc_results = [], []
for th in micro_thresholds:
  pred_bools = [pl>th for pl in pred_labels]
  test_roc_auc_accuracy = roc_auc_score(true_bools,pred_bools,average='macro')
  test_flat_accuracy = accuracy_score(true_bools, pred_bools)
  roc_auc_results.append(test_roc_auc_accuracy)
  flat_acc_results.append(test_flat_accuracy)

best_roc_auc_idx = np.argmax(roc_auc_results) #best threshold value

# Printing and saving classification report
print('Best Threshold: ', micro_thresholds[best_roc_auc_idx])
print('Test roc_auc Accuracy: ', roc_auc_results[best_roc_auc_idx])
print('Test Flat Accuracy: ', flat_acc_results[best_roc_auc_idx], '\n')

best_pred_bools = [pl>micro_thresholds[best_roc_auc_idx] for pl in pred_labels]
clf_report_optimized = classification_report(true_bools,best_pred_bools, target_names=label_cols)
pickle.dump(clf_report_optimized, open('classification_report_optimized.txt','wb'))
print(clf_report_optimized)

Best Threshold:  0.4
Test roc_auc Accuracy:  0.5672297107264808
Test Flat Accuracy:  0.05358615004122012 

              precision    recall  f1-score   support

       Angry       0.52      0.45      0.48       474
     Disgust       0.50      0.53      0.51       450
        Fear       0.58      0.34      0.42       444
       Happy       0.55      0.72      0.63       516
         Sad       0.31      0.02      0.04       206
    Surprise       0.37      0.47      0.41       412
     Neutral       0.68      0.90      0.78       797
      Others       0.67      0.03      0.05        75

   micro avg       0.55      0.56      0.56      3374
   macro avg       0.52      0.43      0.42      3374
weighted avg       0.54      0.56      0.52      3374
 samples avg       0.57      0.59      0.55      3374



  _warn_prf(average, modifier, msg_start, len(result))


In [59]:
roc_auc_score(true_bools, best_pred_bools,average='macro')

0.5672297107264808