In [None]:
!pip install transformers==3.3.1

Collecting transformers==3.3.1
[?25l  Downloading https://files.pythonhosted.org/packages/19/22/aff234f4a841f8999e68a7a94bdd4b60b4cebcfeca5d67d61cd08c9179de/transformers-3.3.1-py3-none-any.whl (1.1MB)
[K     |████████████████████████████████| 1.1MB 11.4MB/s 
[?25hCollecting tokenizers==0.8.1.rc2
[?25l  Downloading https://files.pythonhosted.org/packages/75/26/c02ba92ecb8b780bdae4a862d351433c2912fe49469dac7f87a5c85ccca6/tokenizers-0.8.1rc2-cp37-cp37m-manylinux1_x86_64.whl (3.0MB)
[K     |████████████████████████████████| 3.0MB 35.2MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)
[K     |████████████████████████████████| 901kB 36.4MB/s 
[?25hCollecting sentencepiece!=0.1.92
[?25l  Downloading https://files.pythonhosted.org/packages/f5/99/e0808cb947ba10f575839c43e8fafc9cc44e4a7a2c8f79c60db48220a577/sentencepiece-0.1.95-cp37-cp37m-many

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use("ggplot")

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models

from zipfile import ZipFile
import os
import time
from shutil import copy2
from torch.utils.data import DataLoader
from transformers import AdamW,get_linear_schedule_with_warmup,AutoModel,AutoTokenizer
from PIL import Image

In [None]:
extract_path = '/content/drive/MyDrive/Colab Notebooks/Datasets and weights/Tamil_troll_memes/training_img.zip'
with ZipFile(extract_path, 'r') as zipObj:
   zipObj.extractall()

In [None]:
def move_data(start,troll,not_troll):
  for img_name in os.listdir(start):
    src = os.path.join(start,img_name)
    if img_name.startswith('N'):
      copy2(src,not_troll)
    else:
      copy2(src,troll)

os.mkdir('Troll')
os.mkdir('Non_troll')
src = '/content/uploaded_tamil_memes'
move_data(src,'/content/Troll','/content/Non_troll')

def split_data(start,train,val,split):
  for i, img_name in enumerate(os.listdir(start)):
    src = os.path.join(start,img_name)
    if i < split:
      copy2(src,val)
    else:
      copy2(src,train)

os.mkdir('Train')
os.mkdir('Val')
split_data('/content/Troll','/content/Train','/content/Val',128)
split_data('/content/Non_troll','/content/Train','/content/Val',101)

In [None]:
class TamilDataset(torch.utils.data.Dataset):
  def __init__(self,df,tokenizer,max_len,path,transforms=None):
    self.data_dir = path
    self.df = df
    self.tokenizer = tokenizer
    self.transforms = transforms
    self.max_len = max_len

  def __len__(self):
    return self.df.shape[0]

  def __getitem__(self,index):
    img_name, captions = self.df.iloc[index]
    img_path = os.path.join(self.data_dir,img_name)
    labels = 0 if img_name.startswith('N') else 1
    img = Image.open(img_path).convert('RGB')

    if self.transforms is not None:
      img = self.transforms(img)

    encoding = self.tokenizer.encode_plus(
        captions,
        add_special_tokens=True,
        max_length = self.max_len,
        return_token_type_ids = False,
        padding = 'max_length',
        return_attention_mask= True,
        return_tensors='pt',
        truncation=True
    )

    return {
        'image' : img,
        'text' : captions,
        'input_ids' : encoding['input_ids'].flatten(),
        'attention_mask' : encoding['attention_mask'].flatten(),
        'label' : torch.tensor(labels,dtype=torch.float)
    } 

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Datasets and weights/Tamil_troll_memes/train_captions.csv')
df.drop('Unnamed: 0',axis=1,inplace=True)

train_df_data = []
val_df_data = []
for img_name in os.listdir('/content/Train'):
  ind = list(df[df['imagename'] == img_name].index)[0]
  train_df_data.append([img_name,df['captions'].iloc[ind]])

for img_name in os.listdir('/content/Val'):
  ind = list(df[df['imagename'] == img_name].index)[0]
  val_df_data.append([img_name,df['captions'].iloc[ind]])

train_df = pd.DataFrame(train_df_data,columns=['img_name','captions'])
val_df = pd.DataFrame(val_df_data,columns=['img_name','captions'])

In [None]:
def create_data_loader(df,tokenizer,max_len,batch_size,mytransforms,path,shuffle):
  ds = TamilDataset(
      df,
      tokenizer,
      max_len,
      path,
      mytransforms
  )

  return DataLoader(ds,
                    batch_size = batch_size,
                    shuffle=False,
                    num_workers=2)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
PRE_TRAINED_MODEL_NAME = 'distilbert-base-multilingual-cased'
tokenizer = AutoTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)

In [None]:
my_trans = transforms.Compose([
        transforms.Resize((300,300)),
        transforms.ToTensor(),     
])

BATCH_SIZE = 16
MAX_LEN = 128
train_data_loader = create_data_loader(train_df,tokenizer,MAX_LEN,BATCH_SIZE,my_trans,'/content/Train',True)
val_data_loader = create_data_loader(val_df,tokenizer,MAX_LEN,BATCH_SIZE,my_trans,'/content/Val',False)

In [None]:
class Inception(nn.Module):
  def __init__(self):
    super(Inception,self).__init__()
    self.inception = models.inception_v3(True)
    self.Conv2d_1a_3x3 = self.inception.Conv2d_1a_3x3
    self.Conv2d_2a_3x3 = self.inception.Conv2d_2a_3x3
    self.Conv2d_2b_3x3 = self.inception.Conv2d_2b_3x3
    self.maxpool1 = self.inception.maxpool1
    self.Conv2d_3b_1x1 = self.inception.Conv2d_3b_1x1
    self.Conv2d_4a_3x3 = self.inception.Conv2d_4a_3x3
    self.maxpool2 = self.inception.maxpool2
    self.Mixed_5b = self.inception.Mixed_5b
    self.Mixed_5c = self.inception.Mixed_5c
    self.Mixed_5d = self.inception.Mixed_5d
    self.Mixed_6a = self.inception.Mixed_6a
    self.Mixed_6b = self.inception.Mixed_6b
    self.Mixed_6c = self.inception.Mixed_6c
    self.Mixed_6d = self.inception.Mixed_6d
    self.conv1 = nn.Conv2d(768,1280,1)

  def forward(self,x):
    x = self.Conv2d_1a_3x3(x)
    x = self.Conv2d_2a_3x3(x)
    x = self.Conv2d_2b_3x3(x)
    x = self.maxpool1(x)
    x = self.Conv2d_3b_1x1(x)
    x = self.Conv2d_4a_3x3(x)
    x = self.maxpool2(x)
    x = self.Mixed_5b(x)
    x = self.Mixed_5c(x)
    x = self.Mixed_5d(x)
    x = self.Mixed_6a(x)
    x = self.Mixed_6b(x)
    x = self.Mixed_6c(x)
    x = self.Mixed_6d(x)
    return x

In [None]:
class multimodal(nn.Module):
  def __init__(self):
    super(multimodal, self).__init__()
    self.auto = AutoModel.from_pretrained('distilbert-base-multilingual-cased')
    self.inception = Inception()
    self.mha = nn.MultiheadAttention(768,16,0.1)
    self.fc1 = nn.Linear(98304,2048)
    self.fc2 = nn.Linear(2048,512)
    self.drop = nn.Dropout(0.3)
    self.fc3 = nn.Linear(512,1)
    self.relu = nn.ReLU()


  def forward(self,input_ids, attention_mask,img):
    output1 = self.auto(input_ids=input_ids, attention_mask=attention_mask)
    f_half = img[:,:,:,:150]
    s_half = img[:,:,150:]
    output2 = self.inception(f_half)
    output3 = self.inception(s_half)

    output1 = output1[0].permute(1,0,2)
    batch_size, channels, width, height = output2.shape
    output2 = output2.view(batch_size,channels,width * height).permute(2,0,1)
    output3 = output3.view(batch_size,channels,width * height).permute(2,0,1)

    out = self.mha(output1,output2,output3)[0]
    len,batch_size,embed = out.shape
    out = out.permute(1,0,2)
    out = out.reshape(batch_size,len * embed)

    out = self.fc1(out)
    out = self.relu(out)
    out = self.drop(out)
    out = self.fc2(out)
    out = self.relu(out)
    out = self.drop(out)
    out = self.fc3(out)
    out = self.relu(out)

    return out

In [None]:
model = multimodal()
model = model.to(device)

In [None]:
EPOCHS = 5
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
total_steps = len(train_data_loader)  * EPOCHS
scheduler = get_linear_schedule_with_warmup(
  optimizer,
  num_warmup_steps=0,
  num_training_steps=total_steps
)

loss = nn.BCEWithLogitsLoss().to(device)

def epoch_time(start_time,end_time):
	elapsed_time = end_time - start_time
	elapsed_mins = int(elapsed_time/60)
	elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
	return elapsed_mins,elapsed_secs

In [None]:
def train_epoch(model,data_loader,loss_fn,optimizer,device,scheduler,n_examples):
    model = model.train()
    losses = []
    correct_predictions = 0

    for idx, data in enumerate(data_loader):

        input_ids = data['input_ids'].to(device)
        attention_mask = data['attention_mask'].to(device)
        labels = data['label'].to(device)
        labelsviewed = labels.view(labels.shape[0],1)
        image = data['image'].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            img=image
            )
        sig_outs = torch.sigmoid(outputs)
        preds = [0 if x < 0.5 else 1 for x in outputs]
        preds = torch.tensor(preds).to(device)
        loss = loss_fn(outputs,labelsviewed)

        correct_predictions += torch.sum(preds == labels)
        losses.append(loss.item())

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

    return correct_predictions.double() / n_examples, np.mean(losses)

def eval_model(model, data_loader, loss_fn, device, n_examples):
  model = model.eval()
  losses = []
  correct_predictions = 0
  with torch.no_grad():
    for d in data_loader:
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
      labels = d["label"].to(device)
      labelsviewed = labels.view(labels.shape[0],1)
      image = d['image'].to(device)
      outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask,
        img=image
      )
      sig_outs = torch.sigmoid(outputs)
      preds = [0 if x < 0.5 else 1 for x in outputs]
      preds = torch.tensor(preds).to(device)
      loss = loss_fn(outputs, labelsviewed)
      correct_predictions += torch.sum(preds == labels)
      losses.append(loss.item())
  return correct_predictions.double() / n_examples, np.mean(losses)

In [None]:
from collections import defaultdict

history = defaultdict(list)
best_accuracy = 0
for epoch in range(EPOCHS):
 
 
  start_time = time.time()
  train_acc,train_loss = train_epoch(
      model,
      train_data_loader,
      loss,
      optimizer,
      device,
      scheduler,
      2071
  )
   
  
  val_acc,val_loss = eval_model(
      model,
      val_data_loader,
      loss,
      device,
      229
  )
  
  end_time = time.time()
  epoch_mins, epoch_secs = epoch_time(start_time, end_time)
  print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
  print(f'Train Loss {train_loss} accuracy {train_acc}')
  print(f'Val Loss {val_loss} accuracy {val_acc}')
  print()

  history['train_acc'].append(train_acc)
  history['train_loss'].append(train_loss)
  history['val_acc'].append(val_acc)
  history['val_loss'].append(val_loss)

Epoch: 01 | Epoch Time: 2m 30s
Train Loss 0.46982091149458516 accuracy 0.8387252535007242
Val Loss 0.4254985024531682 accuracy 0.9388646288209607

Epoch: 02 | Epoch Time: 2m 29s
Train Loss 0.3613141480546731 accuracy 0.9623370352486721
Val Loss 0.3639034350713094 accuracy 0.9694323144104803

Epoch: 03 | Epoch Time: 2m 29s
Train Loss 0.3383737903661453 accuracy 0.9797199420569773
Val Loss 0.38473743895689644 accuracy 0.9475982532751092

Epoch: 04 | Epoch Time: 2m 28s
Train Loss 0.3165285150019022 accuracy 0.987445678416224
Val Loss 0.4382855733235677 accuracy 0.9432314410480349

Epoch: 05 | Epoch Time: 2m 28s
Train Loss 0.3139743469368953 accuracy 0.9898599710284887
Val Loss 0.44933277666568755 accuracy 0.9432314410480349



In [None]:
df_test = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Datasets and weights/Tamil_troll_memes/test_captions.csv')
df_test.drop('Unnamed: 0',axis=1,inplace=True)
extract_path = '/content/drive/MyDrive/Colab Notebooks/Datasets and weights/Tamil_troll_memes/test_img.zip'
with ZipFile(extract_path, 'r') as zipObj:
   zipObj.extractall()

In [None]:
test_data_loader = create_data_loader(df_test,tokenizer,MAX_LEN,BATCH_SIZE,my_trans,'/content/test_img',False)

In [None]:
def get_predictions(model,data_loader, device):
    model = model.eval()
    f_preds = []
    with torch.no_grad():
        for d in data_loader:
            input_ids = d["input_ids"].to(device)
            attention_mask = d["attention_mask"].to(device)
            image = d['image'].to(device)
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                img=image
            )
            sig_outs = torch.sigmoid(outputs)
            preds = [0 if x < 0.5 else 1 for x in sig_outs]
            for j in preds:
                f_preds.append(j)
    
    return f_preds

In [None]:
submission_preds = get_predictions(model,test_data_loader,device)

In [None]:
df_org = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Datasets and weights/Tamil_troll_memes/gold_labels_for_test.csv')

In [None]:
def f(x):
    if x == 'troll':
        return 1
    return 0

df_org['label'] = df_org['label'].apply(lambda x : f(x))

In [None]:
from sklearn.metrics import confusion_matrix, plot_confusion_matrix, classification_report

In [None]:
print(classification_report(df_org['label'],submission_preds,target_names=['Non-Troll','Troll']))