<a href="https://colab.research.google.com/github/asmita-mukherjee/Learning_scratchPad/blob/main/Inferencing_from_bert_fine_tuned.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount("/content/drive/")

Mounted at /content/drive/


## Imports

In [48]:
import numpy as np

In [36]:
from tqdm.notebook import tqdm

In [2]:
import pandas as pd

In [3]:
%%capture
pip install transformers

In [4]:
from transformers import BertTokenizer

In [5]:
from transformers import BertForSequenceClassification

In [6]:
import torch

In [7]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

set the device

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"

### Get the test data for which to predict the label

In [9]:
%%capture
pip install wget

In [10]:
import wget

In [11]:
import os

In [12]:
url = 'https://nyu-mll.github.io/CoLA/cola_public_1.1.zip'

if not os.path.exists('./cola_public_1.1.zip'):
  wget.download(url,'./cola_public_1.1.zip')




In [13]:
if not os.path.exists('./cola_public_1.1'):
  !unzip "./cola_public_1.1.zip"


Archive:  ./cola_public_1.1.zip
   creating: cola_public/
  inflating: cola_public/README      
   creating: cola_public/tokenized/
  inflating: cola_public/tokenized/in_domain_dev.tsv  
  inflating: cola_public/tokenized/in_domain_train.tsv  
  inflating: cola_public/tokenized/out_of_domain_dev.tsv  
   creating: cola_public/raw/
  inflating: cola_public/raw/in_domain_dev.tsv  
  inflating: cola_public/raw/in_domain_train.tsv  
  inflating: cola_public/raw/out_of_domain_dev.tsv  


### check the test data

In [14]:
df = pd.read_csv("./cola_public/raw/out_of_domain_dev.tsv", delimiter='\t', header=None, names=['sentence_source', 'label', 'label_notes', 'sentence'])


In [15]:
df

Unnamed: 0,sentence_source,label,label_notes,sentence
0,clc95,1,,Somebody just left - guess who.
1,clc95,1,,"They claimed they had settled on something, bu..."
2,clc95,1,,"If Sam was going, Sally would know where."
3,clc95,1,,"They're going to serve the guests something, b..."
4,clc95,1,,She's reading. I can't imagine what.
...,...,...,...,...
511,w_80,1,,John considers Bill silly.
512,w_80,1,,John considers Bill to be silly.
513,w_80,0,*,John bought a dog for himself to play with.
514,w_80,1,,John arranged for himself to get the prize.


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 516 entries, 0 to 515
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   sentence_source  516 non-null    object
 1   label            516 non-null    int64 
 2   label_notes      163 non-null    object
 3   sentence         516 non-null    object
dtypes: int64(1), object(3)
memory usage: 16.2+ KB


In [17]:
test_sentences = list(df.sentence)

test_labels = list(df.label)

## Tokenize it to convert to tensors

In [18]:
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',do_lower_case=True)

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [19]:
input_ids = []
attn_mask = []

#note we had already decided on the max length in the training notebook

for sen in test_sentences:
  tokenized_dict = bert_tokenizer.encode_plus(sen,max_length=64,padding="max_length",return_tensors="pt")

  input_ids.append(tokenized_dict["input_ids"])
  attn_mask.append(tokenized_dict["attention_mask"])




In [20]:
input_ids = torch.cat(input_ids,dim=0)
attn_mask = torch.cat(attn_mask,dim=0)

input_ids.shape,attn_mask.shape

(torch.Size([516, 64]), torch.Size([516, 64]))

In [21]:
input_ids[0]

tensor([ 101, 8307, 2074, 2187, 1011, 3984, 2040, 1012,  102,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0])

In [22]:
labels = torch.tensor(test_labels)

### Setting up the dataset and the dataloaders

In [29]:
class CustomDataset(Dataset):
  def __init__(self,input_ids,attn_mask,labels):
    self.input_ids = input_ids
    self.attn_mask = attn_mask
    self.labels = labels
  
  def __len__(self):
    return len(self.input_ids)
  
  def __getitem__(self,idx):
    return self.input_ids[idx],self.attn_mask[idx],self.labels[idx]

In [30]:
dataset = CustomDataset(input_ids,attn_mask,labels)
dataloader = DataLoader(dataset,shuffle=True,batch_size=32)

#### Load the saved model(That we had trained in the prev notebook)
> Since we had only saved the weights of the model, we will have to first initialize the model and load the weights

In [37]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased',num_labels=2)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [38]:
#load the model weights from the fine tuned model
model.load_state_dict(torch.load("/content/drive/MyDrive/Learning Scratch pad/bert_12_freezed_20_epoch"))

<All keys matched successfully>

##Inferencing

In [39]:
dataloader

<torch.utils.data.dataloader.DataLoader at 0x7fe9fc642770>

In [40]:
model.eval()
total_test_loss = 0
prediction_logits = []
true_labels = []

for idx,data in tqdm(enumerate(dataloader)):
  input_ids,attn_mask,labels = data

  outputs = model(input_ids,attention_mask=attn_mask,labels=labels)

  loss = outputs.loss
  logits = outputs.logits

  total_test_loss += loss.item()

  logits= logits.detach().to('cpu').numpy()
  prediction_logits.append(logits)

  labels = labels.to('cpu').numpy()
  true_labels.append(labels)

  print(f"The loss for the batch {idx+1} is {loss.item()}")


print(f"The average test loss {total_test_loss/len(dataloader)}")


0it [00:00, ?it/s]

The loss for the batch 1 is 0.8980472087860107
The loss for the batch 2 is 1.675600528717041
The loss for the batch 3 is 0.7622557878494263
The loss for the batch 4 is 1.171164870262146
The loss for the batch 5 is 1.775207757949829
The loss for the batch 6 is 0.7393551468849182
The loss for the batch 7 is 1.6986857652664185
The loss for the batch 8 is 2.028644323348999
The loss for the batch 9 is 1.255449891090393
The loss for the batch 10 is 1.5988820791244507
The loss for the batch 11 is 0.5810948610305786
The loss for the batch 12 is 0.5100498199462891
The loss for the batch 13 is 2.2062032222747803
The loss for the batch 14 is 1.5207111835479736
The loss for the batch 15 is 1.17192542552948
The loss for the batch 16 is 1.6130702495574951
The loss for the batch 17 is 3.894242286682129
The average test loss 1.4765053181087269


### let us have a look at the predictions of the model

In [43]:
sample = prediction_logits[0]

print(f"Length of sample {len(sample)}")

print(f"Contents of the sample {sample}")

print(f"Shape of the sample {sample.shape}")

Length of sample 32
Contents of the sample [[-3.9342012  4.7295575]
 [-3.675071   4.8194575]
 [-3.6696155  4.462717 ]
 [-4.043334   4.821196 ]
 [-4.0668693  4.836847 ]
 [ 2.3301413 -3.0210426]
 [-3.8355951  5.022082 ]
 [-3.62613    4.979614 ]
 [-4.055984   4.943551 ]
 [-3.778499   4.5666165]
 [-3.8672206  4.8226757]
 [ 2.268795  -2.959724 ]
 [-3.7606564  4.92035  ]
 [ 2.5932093 -3.2660134]
 [ 1.9275935 -2.3262722]
 [-3.8400836  4.7004766]
 [-3.8915827  4.740195 ]
 [-2.5887759  2.969937 ]
 [-1.1010731  1.8267429]
 [-3.7930403  4.987639 ]
 [-3.7698095  4.5965004]
 [-3.9344232  4.860271 ]
 [-3.801191   5.0446544]
 [-3.9910476  4.9413333]
 [-3.803753   5.0068254]
 [-3.6488123  4.454935 ]
 [-4.0386763  5.0022616]
 [-3.9780102  4.8152175]
 [ 1.2371112 -1.1843915]
 [-4.007166   4.8660927]
 [-3.5710814  5.035742 ]
 [ 3.1770377 -3.9925895]]
Shape of the sample (32, 2)


> hence each idx of the prediction contains the logits of the batch i.e it gives the unnormalized likelihood of one class over the other 

labels

In [44]:
sample = true_labels[0]

print(f"Length of sample {len(sample)}")

print(f"Contents of the sample {sample}")

print(f"Shape of the sample {sample.shape}")

Length of sample 32
Contents of the sample [1 1 1 1 1 0 1 1 1 1 1 0 1 0 0 1 1 0 0 1 1 1 0 1 0 1 1 1 1 1 1 0]
Shape of the sample (32,)


## Evaluate the performance of the model

In [47]:
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

from sklearn.metrics import matthews_corrcoef

In [51]:
total_acc = 0
total_f1 = 0
total_m_coref = 0

predicted = []
true = []

for idx in tqdm(range(len(true_labels))):
  
  pred_logits_batch = prediction_logits[idx]
  labels_batch = true_labels[idx]

  pred_labels = np.argmax(pred_logits_batch,axis=1) 


  acc = accuracy_score(labels_batch,pred_labels)
  f1 = f1_score(labels_batch,pred_labels)
  m_coref = matthews_corrcoef(labels_batch,pred_labels) #finds the correlation between the predicted and the true labels, +1 score= exact match,0=avg random prediction,-1=complete inverse prediction


  predicted.extend(pred_labels)
  true.extend(labels_batch)
  
  print(f"Accuracy for batch {idx+1} is {acc}")
  print(f"F1 Score for batch {idx+1} is {f1}")
  print(f"Matthews coreff for batch {idx+1} is {m_coref} ")

  total_acc += acc
  total_f1 += f1
  total_m_coref += m_coref


avg_acc = total_acc/len(true_labels)
avg_f1 = total_f1/len(true_labels)
avg_m_coreff = total_m_coref/len(true_labels)

print("\n")
print("******************************************************")
print(f"Avg accuracy for the test data {avg_acc:5f}")
print(f"Avg F1 Score for the test data {avg_f1:5f}")
print(f"Avg Matthews coreff for the test data {avg_m_coreff:5f}")




  0%|          | 0/17 [00:00<?, ?it/s]

Accuracy for batch 1 is 0.84375
F1 Score for batch 1 is 0.8979591836734695
Matthews coreff for batch 1 is 0.589872830909857 
Accuracy for batch 2 is 0.78125
F1 Score for batch 2 is 0.8679245283018867
Matthews coreff for batch 2 is 0.3094922302950865 
Accuracy for batch 3 is 0.875
F1 Score for batch 3 is 0.92
Matthews coreff for batch 3 is 0.6342857142857142 
Accuracy for batch 4 is 0.84375
F1 Score for batch 4 is 0.8780487804878048
Matthews coreff for batch 4 is 0.6625413488689132 
Accuracy for batch 5 is 0.75
F1 Score for batch 5 is 0.8095238095238095
Matthews coreff for batch 5 is 0.4732058754737091 
Accuracy for batch 6 is 0.90625
F1 Score for batch 6 is 0.9361702127659574
Matthews coreff for batch 6 is 0.7624437362098716 
Accuracy for batch 7 is 0.75
F1 Score for batch 7 is 0.8333333333333334
Matthews coreff for batch 7 is 0.4133804997216296 
Accuracy for batch 8 is 0.6875
F1 Score for batch 8 is 0.8076923076923077
Matthews coreff for batch 8 is -0.019518001458970664 
Accuracy for 

In [56]:
from sklearn.metrics import classification_report

In [58]:
print(classification_report(true,predicted))

              precision    recall  f1-score   support

           0       0.79      0.53      0.63       162
           1       0.81      0.94      0.87       354

    accuracy                           0.81       516
   macro avg       0.80      0.73      0.75       516
weighted avg       0.81      0.81      0.80       516

