**Installing All Necessary Libraries**

**Importing All required functions,classes**

In [None]:
from transformers import AutoModelForSequenceClassification,BertModel
from torch.utils.data import DataLoader,Dataset,IterableDataset,random_split
from transformers import AdamW
from transformers import get_scheduler
import torch
import numpy as np 
from tqdm.auto import tqdm
from datasets import load_metric
import itertools
import os 
import pandas as pd

**We are using  the Pretrained bert-base-cased tokeinzer using the autotokenizer function, and I am downloading the  pretrained bert_model to look at Its config file**

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
# model.config
bert_model = AutoModelForSequenceClassification.from_pretrained('bert-base-cased')
print(bert_model.config)





Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/426k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/416M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

BertConfig {
  "_name_or_path": "bert-base-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.18.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 28996
}



## Dataloader

**this is the custom dataloader, As firstly we are inherting from the torch Dataset class and we are overloading some important functions then we are adding  the 'get_csv' function that is reading the rows from the csv and then the 'prepare' function that  is calling the tokenizer on each of the text reviews , finally the inputs are returned in the format the model would expect.**

In [None]:
import csv
from itertools import cycle
import pandas as pd
from tqdm.auto import tqdm

class meme_loader(Dataset):
  def __init__(self,name):
    self.rows= self.get_csv(name)
    self.data = []
    self.prepare()

  def get_csv(self,file_path):
    output =[]
    fields = ["file_name", "misogynous", "Text Transcription"]
    csv_df = pd.read_csv(file_path, sep='\t', usecols=fields)
    print(csv_df.head())
    return csv_df

  def __len__(self):
    return self.len

  def __getitem__(self,idx):
    return self.data[idx]
  
  
  def prepare(self):
    progress_bar = tqdm(range(len(self.rows.index)))
    for index,row in self.rows.iterrows():
      output = {**{k:torch.tensor(v) for k,v in tokenizer(row['Text Transcription'],padding='max_length',truncation=True,max_length=256).items()},'labels':int(row['misogynous'])}
      self.data.append(output)
      progress_bar.update(1)
    self.len = len(self.data)



**We are calling the dataloader on each of the csv files for train,test and dev the total number of records in each of them are printed**

In [None]:

all_data  = meme_loader("/content/drive/MyDrive/computationSS/training.csv")
train_data,eval_data = random_split(all_data,[int(0.9*len(all_data)),len(all_data)-int(0.9*len(all_data))])
print(len(train_data))
# print(len(eval_datax))
# print(len(test_datax))


train_dataloader = DataLoader(train_data,shuffle=True,batch_size=16)
eval_dataloader = DataLoader(eval_data,shuffle=True,batch_size=16)
# test_dataloader = DataLoader(test_datax,shuffle=True,batch_size=8)

   file_name  misogynous                                 Text Transcription
0      1.jpg           0                                      Milk Milk.zip
1     10.jpg           1  ROSES ARE RED, VIOLETS ARE BLUE IF YOU DON'T S...
2   1000.jpg           0  BREAKING NEWS: Russia releases photo of DONALD...
3  10000.jpg           0                       MAN SEEKING WOMAN Ignad 18 O
4  10006.jpg           0  Me explaining the deep lore of. J.R.R. Tolkein...


  0%|          | 0/10000 [00:00<?, ?it/s]

9000


**bold text**## Model

**This is the custom class for the model it inherits the nn.Module class, we are initializing the bert model, a dense layer and the dropout layer, the forward functiond defines how the inputs will be processed and which layers will be called when, we first call the dropout then the dense layer and then compute the loss , returning that**




In [None]:
from torch import nn
from torch.nn import CrossEntropyLoss

class BertForMEME(nn.Module):
  def __init__(self,name,num_labels,dropout,trained=False):
    super(BertForMEME, self).__init__()
    self.num_labels = num_labels
    self.model = BertModel.from_pretrained(name)
    self.dense = nn.Linear(self.model.config.hidden_size,num_labels)
    self.dropout = nn.Dropout(self.model.config.hidden_dropout_prob)

  def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        labels=None,):
      
      outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
        )
      
      pooled_outputs = outputs[1]
      
      pooled_outputs = self.dropout(pooled_outputs)
      logits = self.dense(pooled_outputs)
      loss_fn = CrossEntropyLoss()
      loss = loss_fn(logits.view(-1,self.num_labels),labels.view(-1))
      return loss,logits

# Testing the model to see if inputs and outputs are proper
bert_my = BertForMEME('bert-base-cased',2,0.1)
# inputs = tokenizer("hello, I have bought a New Car",padding='max_length',truncation=True)
# print(inputs.keys())
# inputs['labels']=[0]
# inputs={k:torch.unsqueeze(torch.tensor(v),0) for k,v in inputs.items()}
# print(bert_my(**inputs))

# torch.save(bert_my.state_dict(), 'drive/MyDrive/IMDB/test.pt')



Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


**preparing the optimizer and the scheduler also listing out the number of steps the model will be performing, total epochs and the batch size**

In [None]:
from transformers import AdamW
from transformers import get_scheduler
optimizer = AdamW(bert_model.parameters(),lr=5e-5)
num_epochs = 5
batch_size = 16
num_training_steps= num_epochs*(len(train_dataloader))
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)
print(num_training_steps,num_epochs,batch_size)


2815 5 16




**Shifting the model to CUDA**

In [None]:
import torch

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
bert_model.to(device)
print(device)

cuda


**This is the Training Loop, Here we are getting the prepared batched inputs then processing them through the model, calculating the loss, backpropogating the gradients and then saving the loss and accuracy for future visualization**

In [None]:




# bert_my.train()
f1_metric = load_metric('f1')
accuracy_metric = load_metric('accuracy')

step=0
running_corrects = 0
logs = {}
losses = []
accs =[]
progress_bar = tqdm(range(num_training_steps))


for _ in range(num_epochs):
  bert_model.train()
  for batch in train_dataloader:
      step+=1
      # print(step," of ",num_training_steps)
      batch = {k: v.to(device) for k, v in batch.items()}
      outputs = bert_model(**batch)
      loss,logits = outputs.loss,outputs.logits
      loss.backward()
      # print(loss)
      optimizer.step()
      lr_scheduler.step()
      optimizer.zero_grad()
      
      preds=  torch.argmax(logits,dim=-1)
      running_corrects += torch.sum(preds == batch['labels'])
      accuracy = running_corrects.item()/(step*batch_size)
      logs['step']=step
      logs['log loss'] = loss.item()
      logs['accuracy'] = accuracy

      losses.append(loss.item())
      accs.append(accuracy)
      print(logs)
      if(step%1000==0):
        torch.save(bert_model.state_dict(),f'/content/drive/MyDrive/computationSS/Language_model/model_{step}.pt')
      progress_bar.update(1)

  bert_my.eval()
  val_loss_epoch = []
  print("performing eval...>")
  for ebatch in eval_dataloader:
      batch = {k: v.to(device) for k, v in ebatch.items()}
      with torch.no_grad():
          outputs = bert_model(**batch)

      val_loss,logits = outputs.loss,outputs.logits
      predictions = torch.argmax(logits, dim=-1)
      f1_metric.add_batch(predictions=predictions, references=batch["labels"])
      accuracy_metric.add_batch(predictions=predictions, references=batch["labels"])

      val_loss_epoch.append(val_loss.item())
  print("AVERAGE VAL loss ", np.mean(val_loss_epoch))
  print("F1 ",f1_metric.compute())
  print("Accuracy ",accuracy_metric.compute())

print("training complete")
torch.save(bert_model.state_dict(),'/content/drive/MyDrive/computationSS/Language_model/model_final.pt')

  0%|          | 0/2815 [00:00<?, ?it/s]

{'step': 1, 'log loss': 0.7203626036643982, 'accuracy': 0.625}
{'step': 2, 'log loss': 0.6688414216041565, 'accuracy': 0.625}
{'step': 3, 'log loss': 0.6895124316215515, 'accuracy': 0.5833333333333334}
{'step': 4, 'log loss': 0.6526419520378113, 'accuracy': 0.5625}
{'step': 5, 'log loss': 0.7005087733268738, 'accuracy': 0.5625}
{'step': 6, 'log loss': 0.6375906467437744, 'accuracy': 0.59375}
{'step': 7, 'log loss': 0.72061687707901, 'accuracy': 0.5625}
{'step': 8, 'log loss': 0.7041677236557007, 'accuracy': 0.546875}
{'step': 9, 'log loss': 0.6917385458946228, 'accuracy': 0.5416666666666666}
{'step': 10, 'log loss': 0.6098635792732239, 'accuracy': 0.55625}
{'step': 11, 'log loss': 0.759232223033905, 'accuracy': 0.5454545454545454}
{'step': 12, 'log loss': 0.7121027708053589, 'accuracy': 0.53125}
{'step': 13, 'log loss': 0.621692419052124, 'accuracy': 0.5480769230769231}
{'step': 14, 'log loss': 0.6414461135864258, 'accuracy': 0.5491071428571429}
{'step': 15, 'log loss': 0.6489278674125

In [None]:
#load_model

bert_model.load_state_dict(torch.load('/content/drive/MyDrive/computationSS/Language_model/model_final.pt'))
bert_model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element