
This file documents the code that contributes to Experments related to Implement Focal Loss, Data (Image and text) Augmentation, Transfer Learning. 


They are described in the corresponding sections. The sections before them are necessary data/baseline model code.

In [None]:

from google.colab import drive # import drive from google colab

ROOT = "/content/drive"     # default location for the drive
print(ROOT)                 # print content of ROOT (Optional)

drive.mount(ROOT)           # we mount the google drive at /content/drive
%cd /content/drive/My Drive
%pwd
%mkdir dl_project
%cd /content/drive/My Drive/dl_project
%pwd

In [None]:
from google.colab import drive # import drive from google colab
%cp -v /content/drive/MyDrive/dl_project/* /content/
%cd /content/
drive.flush_and_unmount()
%pwd

# Install packages

Install Pytorch 1.6.0+cu101

In [None]:
!pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
%pip install transformers

Install mmf

In [None]:

#!git clone https://github.com/facebookresearch/mmf.git
%cd mmf
!pip install --editable .


# Check if cuda is enabled

In [None]:
!nvcc --version
import torch
torch.cuda.is_available()


In [None]:
url = "https://drivendata-competition-fb-hateful-memes-data.s3.amazonaws.com/XjiOc5ycDBRRNwbhRlgH.zip?AWSAccessKeyId=AKIARVBOBDCY4MWEDJKS&Signature=3eG839TKaJuE8uryxuYkGvAqMC8%3D&Expires=1606846839"
password = "EWryfbZyNviilcDF"

# Download data

In [None]:
url ="https://drivendata-competition-fb-hateful-memes-data.s3.amazonaws.com/XjiOc5ycDBRRNwbhRlgH.zip?AWSAccessKeyId=AKIARVBOBDCY4MWEDJKS&Signature=vwrcLD1%2FgzoI%2B%2Be4TlMITuWphVg%3D&Expires=1607484815"
password = "EWryfbZyNviilcDF"

In [None]:
%cd /content/drive/My Drive/dl_project
%pwd

In [None]:
!curl -o /content/hm.zip "$url" -H 'Referer: https://www.drivendata.org/competitions/64/hateful-memes/data/' --compressed

Zip file into required MMF format.

In [None]:
!mmf_convert_hm --zip_file ./hm.zip --password $password --bypass_checksum=1
%cd /content/drive/My Drive/dl_project
%pwd


# Visualize

In [None]:

from mmf.common.registry import registry
from mmf.models.mmbt import MMBT
from mmf.utils.build import build_dataset
from mmf.utils.env import setup_imports
setup_imports()
dataset = build_dataset("hateful_memes")

import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20, 20)
dataset.visualize(num_samples=8, size=(512, 512), nrow=4)


# Test pretrained model

In [None]:

import matplotlib.pyplot as plt
import requests
from PIL import Image

from mmf.models.mmbt import MMBT

model = MMBT.from_pretrained("mmbt.hateful_memes.images")


In [None]:
image_url = "https://i.imgur.com/tEcsk5q.jpg" #@param {type:"string"}
text = "look how many people love you" #@param {type: "string"}
output = model.classify(image_url, text)
plt.imshow(Image.open(requests.get(image_url, stream=True).raw))
plt.axis("off")
plt.show()
hateful = "Yes" if output["label"] == 1 else "No"
print("Hateful as per the model?", hateful)
print(f"Model's confidence: {output['confidence'] * 100:.3f}%")


# Build baseline model

Unzip hm.zip

In [None]:
%cd content
!unzip -P EWryfbZyNviilcDF ./hm.zip


In [None]:
!mkdir savedata

Implement help functions

In [None]:
import json
import numpy as np
import cv2
import pandas as pd
from transformers import BertModel, BertTokenizer

import torchvision.models
from torchvision import transforms

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import DataLoader,Dataset

from keras_preprocessing.sequence  import pad_sequences
from sklearn.metrics import accuracy_score, precision_score,recall_score
from sklearn.metrics import confusion_matrix

def read_data(arr, label=False):

    df = {}
    df["id"] = []
    df["img_name"] = []
    df["text"] = []

    if label:
        df["label"] = []

    for element in arr:
        js = json.loads(element)
        df["id"].append(js["id"])
        df["img_name"].append(js["img"])
        df["text"].append(js["text"])

        if label:
            df["label"].append(js["label"])

    return df


class HM_Dataset(Dataset):

    def __init__(self, csv, tokenizer, transforms=None, label=False, imagePreLoad = False):
        self.csv = csv
        self.tokenizer = tokenizer
        self.transforms = transforms
        self.label = label
        self.imagePreLoad = imagePreLoad

    def __len__(self):
        return len(self.csv)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx.to_list()
        req = self.csv.iloc[idx]
        img_name = req.img_name
        text = req.text
        encoding = self.tokenizer.encode(text)
        encoding = pad_sequences([encoding], maxlen=20, padding="post")
        mask = encoding.copy()
        mask[mask > 0] = 1
        if self.imagePreLoad:
          img = req.image
        else:
          img = cv2.imread("data/" + img_name)
        if self.transforms:
            img = self.transforms(img)

        if self.label:
            img_label = [req.label]
            sample = {"image": img, "label": torch.FloatTensor(img_label), "text": text,
                      "embeddings": torch.LongTensor(encoding), "attn": torch.FloatTensor(mask)}
        else:
            sample = {"image": img, "text": text,
                      "embeddings": torch.LongTensor(encoding), "attn": torch.FloatTensor(mask)}

        return sample


def preprocess_data(data_file, label=False, transform = None, return_dataset = False, return_dataframe = False):

    with open(data_file) as f:
        arr = f.readlines()

    data_dict = read_data(arr, label=label)
    dataframe = pd.DataFrame(data_dict)
    if return_dataframe:
      return dataframe
    dataset = HM_Dataset(dataframe, tokenizer, transforms=transform, label=label)
    if return_dataset:
      return dataset
    else:
      dataloader = DataLoader(dataset, shuffle=True, batch_size=8)
      return dataloader

    
def metrics(out, label, accuracy=True, precision=False, recall=False, specificity = False):
    arr = out.detach().cpu().numpy()
    mask_0 = arr < 0.5
    mask_1 = arr > 0.5
    arr[mask_0] = 0
    arr[mask_1] = 1
    lab = label.cpu().numpy()

    ret = []
    if accuracy:
        acc = np.sum(lab == arr) / len(arr)
        ret.append(acc)
    if precision:
        precision = precision_score(arr, lab)
        ret.append(precision)
    if recall:
        recall = recall_score(arr, lab)
        ret.append(recall)
    if specificity:

        try:
          [tn, fp, fn, tp] = confusion_matrix(lab, arr).ravel()
          specificity = tn / (tn+fp)
        except:
          specificity = 0
        ret.append(specificity)


    return ret

# Implement Focal Loss
Reference: 1. https://discuss.pytorch.org/t/is-this-a-correct-implementation-for-focal-loss-in-pytorch/43327/8
2. Deep Learning Class Assignment 2 Code

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

def reweight(cls_num_list, beta=0.9999):
    '''
    Implement reweighting by effective numbers
    :param cls_num_list: a list containing # of samples of each class
    :param beta: hyper-parameter for reweighting, see paper for more details
    :return:
    '''
    per_cls_weights = None
    #############################################################################
    # TODO: reweight each class by effective numbers                            #
    #############################################################################
    cls_num_list = cls_num_list.cpu()
    C = len(cls_num_list)
    per_cls_weights = (1-beta)/(1-beta**np.array(cls_num_list))
    per_cls_weights = per_cls_weights/np.sum(per_cls_weights)*C #Normalize
    per_cls_weights = torch.from_numpy(per_cls_weights)
    #############################################################################
    #                              END OF YOUR CODE                             #
    #############################################################################
    per_cls_weights = per_cls_weights.cuda()
    return per_cls_weights

import torch
import torch.nn as nn
import torch.nn.functional as F

class FocalLoss(nn.Module):
    
    def __init__(self, weight=None, 
                 gamma=1., reduction='none'):
        nn.Module.__init__(self)
        self.weight = weight
        self.gamma = gamma
        self.reduction = reduction
        
    def forward(self, input, target):
        #target = torch.gt(target, 0.5)
        #target = target.type(torch.int64)
        weight = self.weight
        target_bi = torch.gt(target, 0.5)
        target_bi = target_bi.type(torch.int64)
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        target_bi.to(device)

        BCE_loss = F.binary_cross_entropy_with_logits(input, target, reduction='none')
        pt = torch.exp(-BCE_loss) # prevents nans when probability 0
        F_loss = (1-pt)**self.gamma * BCE_loss


        if self.weight is None:#No reweighting
            F_loss = (1-pt)**self.gamma * BCE_loss
        else: #reweigthing
            weight.to(device)
            w = weight.gather(0,target_bi.data.view(-1))
            F_loss = torch.autograd.Variable(w) * BCE_loss * (1-pt)**self.gamma

        return F_loss.mean()


#https://discuss.pytorch.org/t/is-this-a-correct-implementation-for-focal-loss-in-pytorch/43327/8
def get_cls_num_list(train_df):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    return torch.tensor([sum(train_df['label']==0),sum(train_df['label']==1)],device = device)

In [None]:
class VGG(nn.Module):
  def __init__(self):
    super(VGG, self).__init__()
    torchvision.models.vgg.model_urls['vgg16'] = torchvision.models.vgg.model_urls['vgg16'].replace('https://', 'http://')
    vgg = torchvision.models.vgg16(pretrained=True)
    self.model = torch.nn.Sequential(*(list(vgg.children())[:-1]))
    self.pooling = torch.nn.MaxPool2d(kernel_size=3)
    self.flat_layer = nn.Flatten()

  def forward(self,x):
    x = self.model(x)
    x = self.pooling(x)
    out = self.flat_layer(x)
    return out


class Resnet(nn.Module):
  def __init__(self):
    super(Resnet,self).__init__()
    torchvision.models.resnet.model_urls['resnet18'] = torchvision.models.resnet.model_urls['resnet18'].replace('https://', 'http://')
    resnet = torchvision.models.resnet18(pretrained=True)
    self.model = torch.nn.Sequential(*(list(resnet.children())[:-1]))
    self.pooling = torch.nn.MaxPool2d(kernel_size=3)
    self.flat_layer = nn.Flatten()

  def forward(self,x):
    x = self.model(x)
    out = self.flat_layer(x)
    return out


class Mobilenet(nn.Module):
  def __init__(self):
    super(Mobilenet,self).__init__()
    torchvision.models.mobilenet.model_urls['mobilenet_v2'] = torchvision.models.mobilenet.model_urls['mobilenet_v2'].replace('https://', 'http://')
    mobilenet = torchvision.models.mobilenet_v2(pretrained=True)
    self.model = torch.nn.Sequential(*(list(mobilenet.children())[:-1]))
    self.pooling = torch.nn.MaxPool2d(kernel_size=3)
    self.flat_layer = nn.Flatten()

  def forward(self,x):
    x = self.model(x)
    x = self.pooling(x)
    out = self.flat_layer(x)
    return out


class Ensemble(nn.Module):
  def __init__(self):
    super(Ensemble,self).__init__()
    self.vgg = VGG()
    self.res= Resnet()
    self.mobile = Mobilenet()
    self.fc_b1 = nn.Sequential(nn.Linear(7680,6000),
                               nn.BatchNorm1d(6000))
    self.d1 = nn.Dropout(0.6)
    self.fc_b2 = nn.Sequential(nn.Linear(6000,3000),
                               nn.BatchNorm1d(3000))
    self.d2 = nn.Dropout(0.6)
    self.fc3 = nn.Linear(3000,768)

  def forward(self,x):
    x1 = self.vgg(x)
    x2 = self.res(x)
    x3 = self.mobile(x)
    out_1 = torch.cat([x1,x2,x3],dim=1)
    out_1 = self.d1(self.fc_b1(out_1))
    out_1 = self.d2(self.fc_b2(out_1))
    out = self.fc3(out_1)
    return out


class BERT(nn.Module):
    def __init__(self):
        super(BERT, self).__init__()
        self.model = BertModel.from_pretrained('bert-base-uncased')
        self.pooling = nn.AvgPool1d(kernel_size=3)
        self.flat_layer = nn.Flatten()
        self.dropout = nn.Dropout(0.6)
        self.fc3 = nn.Linear(5120, 768)

    def forward(self, x, attn):
        x = self.model(input_ids=x.squeeze(1), encoder_attention_mask=attn)[0]
        x = self.pooling(x)
        x = self.flat_layer(x)
        out = F.relu(self.fc3(x), inplace=False)
        return out


class Base_Model(nn.Module):
    def __init__(self):
        super(Base_Model, self).__init__()
        self.bert_part = BERT()
        self.batch_norm = nn.BatchNorm1d(6000)
        self.ensemble = Ensemble()
        self.fc1 = nn.Linear(1536, 6000)
        self.fc2 = nn.Linear(6000, 3000)
        self.fc3 = nn.Linear(3000, 1)

    def forward(self, image, text, attn):
        x1 = self.ensemble(image)
        x2 = self.bert_part(text, attn)

        x3 = self.fc1(torch.cat((x1, x2), dim=1))
        x3 = self.batch_norm(x3)
        x4 = F.relu(self.fc2(x3), inplace=False)
        out_1 = self.fc3(x4)
        out = torch.sigmoid(out_1)
        return out

    def fit(self, train_data, valid_data=None, epochs=3,loss_type = "BCE", cls_num_list = None, beta = 0.9999):
        losses = []
        losses_val = []
        losses_step = {}
        losses_val_step = {}
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        optim = Adam(self.parameters(), lr=1e-5, weight_decay=0.001)

        if cls_num_list is not None:
            per_cls_weights = reweight(cls_num_list, beta=beta)
            #if torch.cuda.is_available():
            #    per_cls_weights = per_cls_weights.cuda()
        else:
            per_cls_weights = None

        if loss_type == "BCE":
            criterion = nn.BCELoss()
        else:
            criterion = FocalLoss(weight=per_cls_weights, gamma=1)

        for epoch in range(epochs):
            self.train()
            print("===================== Training ====================")
            print("Epoch # ", epoch + 1)
            train_loss = 0.0
            train_acc = 0.0
            train_pre = 0.0
            train_rec = 0.0
            train_spe = 0.0
            losses_step[epoch] = []
            losses_val_step[epoch] = []

            for num, batch in enumerate(train_data):
                img, embeddings, label = batch["image"].to(device), batch["embeddings"].to(device), batch["label"].to(
                    device)
                mask = batch["attn"].to(device)
                out = self.forward(img, embeddings, mask)

                loss = criterion(out, label)
                train_loss += loss.item()
                [acc, prec, rec, spe] = metrics(out, label, accuracy=True, precision=True, recall=True, specificity = True)
                train_acc += acc
                train_pre += prec
                train_rec += rec
                train_spe += spe

                losses_step[epoch].append([loss.item(),acc,prec,rec,spe])

                loss.backward()
                optim.step()
                optim.zero_grad()
                

                if num > 0 and num % 100 == 0:
                    print("Loss after ", num, " steps: ", train_loss / num)
                    print("Accuracy after ", num, " steps: ", train_acc / num)
                    print("Precision after ", num, " steps: ", train_pre / num)
                    print("Recall after ", num, " steps: ", train_rec / num)
                    print("specificity after ", num, " steps: ", train_spe / num)

                    # print("Actual Output: ",label)
                    # print("Predicted Output: ",out)

            losses.append([train_loss / len(train_data),train_acc / num,train_pre / num, train_rec / num,train_spe / num])
            print("----------------After epoch ", epoch + 1, "-------------------------")
            print("Loss after ", num, " steps: ", train_loss / num)
            print("Accuracy after ", num, " steps: ", train_acc / num)
            print("Precision after ", num, " steps: ", train_pre / num)
            print("Recall after ", num, " steps: ", train_rec / num)
            print("specificity after ", num, " steps: ", train_spe / num)

            self.eval()
            print("=====================Validating=====================")
            eval_acc = 0.0
            eval_loss = 0.0
            eval_prec = 0.0
            eval_rec = 0.0
            eval_spe = 0.0
            if valid_data:
              for num,batch in enumerate(valid_data):
                img,embeddings,label = batch["image"].to(device),batch["embeddings"].to(device),batch["label"].to(device)
                mask = batch["attn"].to(device)
                out = self.forward(img,embeddings,mask)
              
                loss = criterion(out,label)

                eval_loss+=loss.item()
                [acc,precision,recall,spe] = metrics(out,label,True,True,True, True)
                eval_acc+=acc
                eval_prec+=precision 
                eval_rec+=recall
                eval_spe += spe

                losses_val_step[epoch].append([loss.item(),acc,precision,recall,spe])

                if num == 5:
                  print("Actual Labels: ",label)
                  arr = out
                  print("Predicted Labels",arr)
              print("Val_loss after ",epoch+1," epochs: ",eval_loss/len(valid_data))
              losses_val.append([eval_loss/len(valid_data),eval_acc/len(valid_data),eval_prec /len(valid_data), eval_rec /len(valid_data), eval_spe /len(valid_data)])

              print("Val_accuracy after ",epoch+1," epochs: ",eval_acc/len(valid_data))
              print("Val_precision after ",epoch+1," epochs: ",eval_prec/len(valid_data))
              print("Val_recall after ",epoch+1," epochs: ",eval_rec/len(valid_data))
              print("Val_specificity after ",epoch+1," epochs: ",eval_spe/len(valid_data))
        return losses, losses_val, losses_step, losses_val_step

#https://towardsdatascience.com/understanding-auc-roc-curve-68b2303cc9c5

In [None]:
def constuct_loss_stuct(losses, losses_val, losses_step, losses_val_step):
  loss_struct = {}
  loss_struct['losses'] = losses
  loss_struct['losses_val'] = losses_val
  loss_struct['losses_step'] = losses_step
  loss_struct['losses_val_step'] = losses_val_step

  return loss_struct

import pickle

def pickle_loss(loss_struct, name):
  f = open('savedata/'+name+'.pckl', 'wb')
  pickle.dump(loss_struct, f)
  f.close()
  #f = open('savedata/loss_struct.pckl', 'rb')
  #loss_struct2 = pickle.load(f)
  #f.close()

def read_pickle_data(name):
  f = open('savedata/'+name+'.pckl', 'rb')
  df = pickle.load(f)
  f.close()
  return df

**Train** baseline model

In [None]:

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
transform = transforms.Compose([transforms.ToPILImage(), transforms.Resize((224, 224)), transforms.ToTensor()])

train_data = preprocess_data("data/train.jsonl", label=True, transform = transform)
valid_data = preprocess_data("data/dev_seen.jsonl", label=True, transform = transform)
#test_data = preprocess_data("data/test_seen.jsonl", label=False, transform = transform)

torch.manual_seed(42)
model = Base_Model()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs=10
losses, losses_val, losses_step, losses_val_step = model.fit(train_data, valid_data, epochs=num_epochs)



Focal Loss with reweighting

In [None]:
batch_size = 8
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
transform = transforms.Compose([transforms.ToPILImage(), transforms.Resize((224, 224)), transforms.ToTensor()])

train_df = preprocess_data("data/train.jsonl", label=True, transform = transform, return_dataframe=True)
cls_num_list = get_cls_num_list(train_df)

train_data = preprocess_data("data/train.jsonl", label=True, transform = transform)
valid_data = preprocess_data("data/dev_seen.jsonl", label=True, transform = transform)
#test_data = preprocess_data("data/test_seen.jsonl", label=False, transform = transform)

torch.manual_seed(42)
model = Base_Model()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs=20
losses, losses_val, losses_step, losses_val_step = model.fit(train_data, valid_data, epochs=num_epochs, loss_type = "Focal", cls_num_list = cls_num_list)



Focal Loss without reweighting

In [None]:

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
transform = transforms.Compose([transforms.ToPILImage(), transforms.Resize((224, 224)), transforms.ToTensor()])

train_data = preprocess_data("data/train.jsonl", label=True, transform = transform)
valid_data = preprocess_data("data/dev_seen.jsonl", label=True, transform = transform)
#test_data = preprocess_data("data/test_seen.jsonl", label=False, transform = transform)

torch.manual_seed(42)
model = Base_Model()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs=20
losses, losses_val, losses_step, losses_val_step = model.fit(train_data, valid_data, epochs=num_epochs, loss_type = "Focal")



# Data (Image and text) Augmentation

In [None]:
def load_dataset(data_file, transform=None, label=False):

    with open(data_file) as f:
        arr = f.readlines()

    data_dict = read_data(arr, label=label)
    dataframe = pd.DataFrame(data_dict)
    dataset = HM_Dataset(dataframe, tokenizer, transforms=transform, label=label)
    #dataloader = DataLoader(dataset, shuffle=True, batch_size=8)

    return dataset
    


In [None]:
# Image Augmentation
trans = transforms.Compose([transforms.ToPILImage(), 
                          transforms.Resize((224, 224)), 
                          transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.4),
                          transforms.RandomRotation(20),
                          transforms.RandomVerticalFlip(0.1),
                          transforms.Resize((224, 224)), 
                          transforms.ToTensor()])

Custom functions to add words/sentences to the meme images

In [None]:
from PIL import Image, ImageDraw
from PIL import ImageFont

#Reference: https://stackoverflow.com/questions/41556771/is-there-a-way-to-outline-text-with-a-dark-line-in-pil
def add_subtitle(
    bg,
    text="nice",
    xy=("center", 50),
    font="arial.ttf",
    font_size=24,
    font_color=(255, 255, 255),
    stroke=2,
    stroke_color=(0, 0, 0),
    shadow=(4, 4),
    shadow_color=(0, 0, 0),
):
    """draw subtitle on image by pillow
    Args:
        bg(PIL image): image to add subtitle
        text(str): subtitle
        xy(tuple): absolute top left location of subtitle
        ...: extra style of subtitle
    Returns:
        bg(PIL image): image with subtitle
    """
    stroke_width = stroke
    xy = list(xy)
    W, H = bg.width, bg.height
    font = ImageFont.truetype(str(font), font_size)
    w, h = font.getsize(text, stroke_width=stroke_width)
    if xy[0] == "center":
        xy[0] = (W - w) // 2
    if xy[1] == "center":
        xy[1] = (H - h) // 2
    draw = ImageDraw.Draw(bg)
    if shadow:
        draw.multiline_text(
            (xy[0] + shadow[0], xy[1] + shadow[1]), text, font=font, fill=shadow_color
        )
    draw.multiline_text(
        (xy[0], xy[1]),
        text,
        font=font,
        fill=font_color,
        stroke_width=stroke_width,
        stroke_fill=stroke_color,
    )
    return bg

In [None]:
#Download font
!curl https://www.freefontspro.com/d/14454/arial.zip -o ./arial.zip

In [None]:
!unzip ./arial.zip

In [None]:
def read_text_and_label(arr):
    df = {}
    df["text"] = []
    df["label"] = []

    for element in arr:
        js = json.loads(element)
        df["text"].append(js["text"])
        df["label"].append(js["label"])
    return df

def construct_text_library(data_file):
    with open(data_file) as f:
        arr = f.readlines()
    data_dict = read_text_and_label(arr)
    return data_dict
    
data_file = "data/train.jsonl"

text_dict = construct_text_library(data_file)

In [None]:

import progressbar
def read_data_with_text_augmentation(arr, text_dict, sample_size):

    df = {}
    df["id"] = []
    df["img_name"] = []
    df["image"] = []
    df["text"] = []
    df["label"] = []

    idx_arr = np.random.randint(0,len(arr),size = sample_size)
    idx_lib = np.random.randint(0,len(text_dict['label']),size = sample_size)

    for ki in progressbar.progressbar(range(sample_size)):
        ei = idx_arr[ki]
        li = idx_lib[ki]

        element = arr[ei]
        js = json.loads(element)
        df["id"].append(js["id"])
        df["img_name"].append(js["img"])
        txt = text_dict['text'][li]
        new_text = js["text"] + " " + txt
        img_arr = cv2.imread("data/" + js["img"])

        ntxt = len(txt)
        mtxt = int(ntxt/2)

        img2 = transforms.ToPILImage(mode='RGB')(img_arr)
        add_subtitle(img2, txt[0:mtxt]+"\n"+txt[mtxt:],xy=("center", "center"))
        img2 = transforms.ToTensor()(img2)

        df["text"].append(new_text)
        df["image"].append(img2)
        df["label"].append(max(js["label"],text_dict['label'][li]))
        
    return df

In [None]:

def aug_trainingdata_with_text(data_file, text_dict, transform=None, sample_size = 1000, return_dataset= False):
  with open(data_file) as f:
      arr = f.readlines()

  data_dict = read_data_with_text_augmentation(arr, text_dict, sample_size)
  dataframe = pd.DataFrame(data_dict)
  dataset = HM_Dataset(dataframe, tokenizer, transforms = transform, label = True, imagePreLoad = True)
  if return_dataset:
    return dataset
  else:
    dataloader = DataLoader(dataset, shuffle=True, batch_size=8)
    return dataloader

In [None]:
train_dataset = preprocess_data("data/train.jsonl", label=True, transform = transform, return_dataset = True)

In [None]:
data_file = "data/train.jsonl"
with open(data_file) as f:
    arr = f.readlines()

data_dict = read_data(arr, label=True)

In [None]:
trans_aug = transforms.Compose([transforms.ToPILImage(), 
                          transforms.Resize((224, 224)), 
                          transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.4),
                          transforms.RandomRotation(20),
                          transforms.RandomVerticalFlip(0.1),
                          transforms.Resize((224, 224)), 
                          transforms.ToTensor()])
aug_img_train_dataset = preprocess_data("data/train.jsonl",label=True, transform = trans_aug, return_dataset = True)


In [None]:
aug_text_train_dataset = aug_trainingdata_with_text(data_file, text_dict, transform = transform, sample_size = 3000, return_dataset= True)

In [None]:
augmented_train_dataset = torch.utils.data.ConcatDataset([train_dataset,aug_img_train_dataset,aug_text_train_dataset])
augmented_train_data = DataLoader(augmented_train_dataset, shuffle=True, batch_size=8)
del train_dataset
del aug_img_train_dataset
del aug_text_train_dataset
del augmented_train_dataset

In [None]:
valid_data = preprocess_data("data/dev_seen.jsonl", label=True, transform = transform)

torch.manual_seed(42)
model = Base_Model()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs=10
losses, losses_val, losses_step, losses_val_step = model.fit(augmented_train_data, valid_data, epochs=num_epochs)

# Transfer Learning

Hate Speech and Offensive Language Dataset can be downloaded via: https://www.kaggle.com/mrmorj/hate-speech-and-offensive-language-dataset

In [None]:
#twitter_labeled_data.csv can be downloaded via:
#
twitter_df = pd.read_csv('twitter_labeled_data.csv')

twitter_df["id"] = twitter_df.index.astype(str)
twitter_df["img_name"] = twitter_df["id"] 
twitter_df["image"] = [torch.rand(3,1,1) for k in range(len(twitter_df))]
twitter_df["text"] = twitter_df["tweet"]
twitter_df["label"] = (twitter_df["class"]!=2).astype(int)



twitter_df_cleaned = twitter_df[["id","img_name","image","text","label"]].sample(10000).copy()
twitter_dataset = HM_Dataset(twitter_df_cleaned, tokenizer, transforms = transform, label = True, imagePreLoad = True)

twitter_dataloader = DataLoader(twitter_dataset, shuffle=True, batch_size=8)

del twitter_df,twitter_df_cleaned, twitter_dataset


In [None]:


#tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
#transform = transforms.Compose([transforms.ToPILImage(), transforms.Resize((224, 224)), transforms.ToTensor()])

torch.manual_seed(42)
model = Base_Model()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs=5
losses, losses_val, losses_step, losses_val_step = model.fit(twitter_dataloader, epochs=num_epochs)



In [None]:
#Move the presaved model to cache

from google.colab import drive # import drive from google colab
%cp -v /content/drive/MyDrive/dl_project/savedata/pre_transferlearning_pre_model.pckl /content/savedata/
#%cp -v /content/drive/MyDrive/dl_project/hm.zip /content/
%cd /content/
drive.flush_and_unmount()
%pwd


In [None]:
model = read_pickle_data('pre_transferlearning_pre_model')

train_data = preprocess_data("data/train.jsonl", label=True, transform = transform)
valid_data = preprocess_data("data/dev_seen.jsonl", label=True, transform = transform)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs=10
losses, losses_val, losses_step, losses_val_step = model.fit(train_data, valid_data, epochs=num_epochs)



In [None]:
#Transfer learninng with  FREEZING

model = read_pickle_data('pre_transferlearning_pre_model')

In [None]:

#for param in model.parameters():
#    param.requires_grad = False

named_parameters = list(model.named_parameters())
parameters_names = [k[0] for k in named_parameters]

set_false_if_contains = ['bert_part.model']
set_to_false = []
for k, namek in enumerate(parameters_names):
    for seti in set_false_if_contains:
        if seti in namek:
            set_to_false.append(k)
params = list(model.parameters())
for k in set_to_false:
    params[k].requires_grad = False

#for param in model.parameters():
#    print(param.requires_grad)
del named_parameters, parameters_names

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs=10
losses, losses_val, losses_step, losses_val_step = model.fit(train_data, valid_data, epochs=num_epochs)
