<a href="https://colab.research.google.com/github/prakashaditya369/mutlimodal-models/blob/main/Facebook_Hateful_Memes_%5BModel_1%5D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import skimage.transform
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from torchvision.transforms.functional import to_tensor
from PIL import Image
import torchvision.models as models
import matplotlib.pyplot as plt
from torchtext.data import Field
from torch.autograd import Variable
%matplotlib inline


USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")
# device = "cpu"

print("Using device : ", device)

In [None]:
train_data = pd.read_json("/content/drive/My Drive/Facebook Hateful Memes/train.jsonl",lines = True)
dev_data = pd.read_json("/content/drive/My Drive/Facebook Hateful Memes/dev.jsonl",lines = True)

In [None]:
train_data.shape[0]

In [None]:
def progress_batch(i,Len, width=30):
  left = int(width * (i+1)*100/Len) // 100
  right = width - left
  print('\r[', '*' * left, ' ' * right, ']',f'{str(i+1)}/{str(Len)}',
        sep='', end='', flush=True)
def progress(percent=0, width=30):
  left = int(width * percent) // 100
  right = width - left
  print('\r[', '#' * left, ' ' * right, ']',
        f' {percent:.2f}%',
        sep='', end='', flush=True)

#Text Preprocessing.

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

##Fit one Training Data Texts

In [None]:
texts = train_data['text']
sentences = list(texts)
tokenizer  = Tokenizer(oov_token='<OOV>')
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
print("Vocab Size: ",len(word_index))

#Data Loader

In [None]:
class TextImageLabels(Dataset):
  def __init__(self,text_tensor,image_tensor,label_tensor):
    self.text = text_tensor
    self.image = image_tensor
    self.labels = label_tensor
  def __len__(self):
    return len(self.labels)

  def __getitem__(self, idx):
    return (self.text[idx],self.image[idx],self.labels[idx])

In [None]:
def get_dataset(PATH,data,tokenizer):
  texts = data['text']
  sentences = list(texts)
  word_index = tokenizer.word_index
  sequences = tokenizer.texts_to_sequences(sentences)
  padded = pad_sequences(sequences,maxlen = 30)
  text_tensor = to_tensor(padded)[0].long()
  print("Text Tensor:",type(text_tensor),text_tensor.shape)
  image = np.load(PATH,allow_pickle=True)
  image = image.reshape(image.shape[0],-1)
  image_tensor = to_tensor(image).squeeze(0)
  print("Image Tensor:",type(image_tensor),image_tensor.shape)
  label_tensor = torch.Tensor(data['label']).view(image_tensor.size(0),1)
  print("Label Tensor:", type(label_tensor),label_tensor.shape)
  dataset = TextImageLabels(text_tensor,image_tensor,label_tensor)
  return dataset

In [None]:
PATH = "/content/drive/My Drive/Facebook Hateful Memes/train_channel_features.npy"
train_dataset = get_dataset(PATH,train_data,tokenizer)

#Model 1

In [None]:
VOCAB_SIZE = len(word_index)+1
HIDDEN_SIZE = 64

In [None]:
class PreLSTM(nn.Module):
  def __init__(self,vocab_size,hidden_size = 64,LSTM_layers=2,dropout = 0.3):
    super(PreLSTM,self).__init__()
    self.vocab_size = vocab_size
    self.LSTM_layers = LSTM_layers
    self.hidden_size = hidden_size
    self.embed = nn.Embedding(vocab_size, hidden_size)
    self.biLSTM = nn.LSTM(hidden_size,hidden_size,dropout=(0 if LSTM_layers == 1 else dropout),num_layers = LSTM_layers,bidirectional = True)
  def forward(self,x,hidden = None):
    x= x.T
    embedded = self.embed(x)
    output,hidden = self.biLSTM(embedded)
    return output,hidden

In [None]:
class Attn(torch.nn.Module):
    def __init__(self,hidden_size=2*64):
        super(Attn, self).__init__()
        self.hidden_size = hidden_size
        self.attn = torch.nn.Linear(self.hidden_size * 2, hidden_size)
        self.final = torch.nn.Linear(self.hidden_size,1)

    def forward(self, hidden, encoder_outputs):
        repeator = hidden.expand(encoder_outputs.size(0),-1,-1)
        concat = torch.cat((repeator, encoder_outputs), -1)
        e = self.attn(concat)
        e = torch.tanh(e)
        e = self.final(e)
        alphas = F.relu(e)
        context = encoder_outputs*alphas
        context = torch.sum(context,dim = 0)
        return context

In [None]:
class PostLSTM(nn.Module):
    def __init__(self,hidden_size=2*64, output_size=50, n_layers=1, dropout=0.1):
        super(PostLSTM, self).__init__()

        # Keep for reference
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.lstm = nn.LSTMCell(hidden_size, hidden_size)
        self.attn = Attn(hidden_size).to(device)

    def forward(self,last_hidden, encoder_outputs):
        hidden,cellState = last_hidden
        output = torch.empty(self.output_size,encoder_outputs.shape[1],encoder_outputs.shape[2],device = device)
        for i in range(self.output_size):
          context = self.attn(hidden, encoder_outputs)
          hidden,cellState = self.lstm(context,(hidden,cellState))
          output[i,:,:] = F.relu(hidden)
        return output

In [None]:
class Concatenate(nn.Module):
    def __init__(self,hidden_size=2*64, output_size=50, n_layers=2, dropout=0.01,dropout_fc = 0.2):
        super(Concatenate, self).__init__()
        self.hidden_size = hidden_size
        self.final = torch.nn.Linear(self.hidden_size,1)
        self.image_layer = torch.nn.Linear(25088,4096)
        self.image_layer1 = torch.nn.Linear(4096,4096)
        self.image_layer2 = torch.nn.Linear(4096,1000)
        self.image_layer3 = nn.Linear(1000,output_size)
        self.final_one = torch.nn.Linear(4*(50+output_size),128)
        self.final_one2 = torch.nn.Linear(128,64)
        self.drop = nn.Dropout(p =dropout_fc)
        self.choose = torch.nn.Linear(64,1)
        self.biLSTM = nn.LSTM(1,2,dropout=(0 if n_layers == 1 else dropout),num_layers = n_layers,bidirectional = True)
    def forward(self,x,image):
        result = self.final(x)
        result_text = F.relu(result).view(result.shape[0],result.shape[1])
        result = self.image_layer(image)
        result = F.relu(result).to(device)
        result = self.image_layer1(result)
        result = F.relu(result)
        result = self.image_layer2(result)
        result = F.relu(result)
        result = self.image_layer3(result)
        result_image = F.relu(result)
        result_image = result_image.t()
        final = torch.cat((result_text,result_image),0)
        final = final.view(final.shape[0],final.shape[1],1)
        output,hidden = self.biLSTM(final)
        output = torch.cat((output[:,:,0],output[:,:,1],output[:,:,2],output[:,:,3]),0).t()
        output = self.final_one(output)
        output = F.relu(output)
        output = self.drop(output)
        output = self.final_one2(output)
        output = F.relu(output)
        output = self.choose(output)
        output = torch.sigmoid(output)
        return output

In [None]:
class Model(nn.Module):
  def __init__(self,vocab_size,hidden_size = 64,pre_n_layers=2,pre_dropout = 0,post_output_size = 50,post_n_layers = 1,post_dropout = 0,pic_output_size = 50,con_n_layers = 1,con_dropout = 0,dropout_fc=0):
    super(Model, self).__init__()
    self.idx = 0  #It can be 0 or 2
    self.preLSTM = PreLSTM(vocab_size,hidden_size,pre_n_layers,pre_dropout).to(device)
    self.postLSTM = PostLSTM(2*hidden_size,post_output_size, post_n_layers, post_dropout).to(device)
    self.concatenationModel = Concatenate(2*hidden_size, pic_output_size, con_n_layers, con_dropout,dropout_fc).to(device)
  def forward(self,x,image):
    encoder_out,encoder_hidden = self.preLSTM(x)
    final_hidden = torch.cat((encoder_hidden[0][self.idx],encoder_hidden[0][self.idx+1]),1)
    final_cell = torch.cat((encoder_hidden[1][self.idx],encoder_hidden[1][self.idx+1]),1)
    last_hidden = (final_hidden,final_cell)
    post_output = self.postLSTM(last_hidden,encoder_out)
    output = self.concatenationModel(post_output,image)
    return output

##Creating Model

In [None]:
model = Model(VOCAB_SIZE,HIDDEN_SIZE,pre_n_layers=1,pre_dropout = 0,post_output_size = 50,post_dropout = 0,pic_output_size = 64,con_n_layers = 2,con_dropout = 0.01,dropout_fc=0.2)
model.to(device)

# Optimizer and Loss Function

In [None]:
import torch.optim as optim
criterion = nn.BCELoss()
optimizer = optim.Adadelta(model.parameters())

#Training Part

##Training and Evaluation Function

In [None]:
def train(model,dataset,epochs,batch_size=10):
  train_dl = DataLoader(dataset,batch_size = batch_size,shuffle = True)
  length = len(train_dl)
  data_length = dataset.__len__()
  for epoch in range(epochs):
    running_loss = 0.0
    print("Epoch: {}".format(epoch+1))
    for i, data in enumerate(train_dl):
      progress_batch(i,length)
      x,image,label = data[0].to(device),data[1].to(device),data[2].to(device)
      optimizer.zero_grad()
      outputs = model(x,image)
      loss = criterion(outputs,label)
      loss.backward()
      optimizer.step()
      running_loss+=loss.item()
    running_loss/=data_length
    print("   Loss:",f' {running_loss:.5f}')
  print("Finished Training")

In [None]:
def evaluate(model,dataset):
  correct = 0
  total = dataset.__len__()
  BATCH_SIZE = 10
  total_loss = 0
  val_dl = DataLoader(dataset,batch_size = BATCH_SIZE)
  with torch.no_grad():
    for data in val_dl:
      x,image,label = data[0].to(device),data[1].to(device),data[2].to(device)
      outputs = model(x,image)
      loss = criterion(outputs,label)
      outputs = outputs.detach()
      outputs = outputs.squeeze(1)
      label = label.squeeze(1)
      predicted = (outputs>0.5).float()
      result = torch.sum(predicted==label)
      correct+=result.item()
      total_loss+=loss.item()
  total_loss/=total
  accuracy = correct/total
  print("Accuracy: {} || Loss: {:.5f}".format(accuracy,total_loss))


##Saving and Loading Function

In [None]:
from datetime import datetime
def save(model):
  now = datetime.now()
  dt_string = now.strftime("%d-%m-%Y:%H:%M:%S")
  PATH = "/content/drive/My Drive/Facebook Hateful Memes/Model/"+dt_string
  torch.save(model.state_dict(), PATH)
  print("Successfully Saved at",PATH)

In [None]:
def load(PATH =None):
  model = Model(VOCAB_SIZE,HIDDEN_SIZE)
  model.to(device)
  if PATH is None:
    PATH = "/content/drive/My Drive/Facebook Hateful Memes/Model/20-08-2020:13:41:38"  #Change this to original file.
  model.load_state_dict(torch.load(PATH))
  return model

In [None]:
train(model,train_dataset,7,batch_size=20)

In [None]:
save(model)

In [None]:
dev_dataset = get_dataset("/content/drive/My Drive/Facebook Hateful Memes/dev_channel_features.npy",dev_data,tokenizer)

In [None]:
print("Training Accuracy:")
evaluate(model,train_dataset)
print("Validation Accuracy:")
evaluate(model,dev_dataset)

##Testing Part

In [None]:
test_data = pd.read_json("/content/drive/My Drive/Facebook Hateful Memes/test.jsonl",lines = True)
print(test_data['id'].shape)
test_PATH = "/content/drive/My Drive/Facebook Hateful Memes/test_channel_features.npy"
print(test_data.shape[0])
ids = test_data['id']
texts = test_data['text']
sentences = list(texts)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(sentences)
padded = pad_sequences(sequences,maxlen = 30)
text_tensor = to_tensor(padded)[0].long()
print("Text Tensor:",type(text_tensor),text_tensor.shape)
image = np.load(test_PATH,allow_pickle=True)
image = image.reshape(image.shape[0],-1)
image_tensor = to_tensor(image).squeeze(0)
print(image_tensor.shape)

In [None]:
with torch.no_grad():
  x = text_tensor.to(device)
  image = image_tensor.to(device)
  outputs = model(x,image)
  outputs = outputs.detach()
  outputs = outputs.squeeze(1)
  predicted = (outputs>0.5).int()
final_output = np.array(outputs.cpu())
final_predicted = np.array(predicted.cpu())
print(ids.shape,final_output.shape,final_predicted.shape)

In [None]:
data = {'id':list(ids),'proba':list(final_output),'label':list(final_predicted)}
df = pd.DataFrame(data)

In [None]:
df.head()

In [None]:
now = datetime.now()
dt_string = now.strftime("%d-%m-%Y:%H:%M:%S")
PATH = "/content/drive/My Drive/Facebook Hateful Memes/SubmissionFile/"+dt_string+".csv"
df.to_csv(PATH,index=False)
print("Saved CSV File at",PATH)