## Loading Saved BERT Model

In [1]:
from transformers import AutoModelForTokenClassification, AutoTokenizer
modelTest = "./model_bert" # contains model saved files
model_trained_Bert = AutoModelForTokenClassification.from_pretrained(modelTest)
tokenizerB = AutoTokenizer.from_pretrained("ai4bharat/indic-bert")


## Loading saved INDIC_NER model

In [2]:
model_NER = './model_ner'
model_trained_NER = AutoModelForTokenClassification.from_pretrained(model_NER)
tokenizerN = AutoTokenizer.from_pretrained("ai4bharat/IndicNER")

## Loading Q1 File

In [3]:
text = []
with open ("./cs689_assignment.txt",'r',encoding='utf-8') as file: #loading sentences
    data = file.readlines()
    for x in data:
        x = x[3:]
        x = x.strip()
        text.append([x])

    #remove empty from text
    text = [x for x in text if x != ['']]
    #split every index of text into words
    # text = [x[0].split() for x in text]
    #get prediction for each element of text

## Loading manually annotated Sentences

In [4]:
with open('./Q1_truth_manually.txt','r',encoding='utf-8') as file:
    data = file.readlines()
    truth = []
    for x in data:
        x = x.strip()
        x = x.split()
        
        truth.append(x)
        #remove empty []
        truth = [x for x in truth if x != []]

## Predicting using NER and BERT


In [5]:
import torch
def get_predictions( sentence, tokenizer, model ):
  # Let us first tokenize the sentence - split words into subwords
  tok_sentence = tokenizer(sentence, return_tensors='pt')

  with torch.no_grad():
    # we will send the tokenized sentence to the model to get predictions
    logits = model(**tok_sentence).logits.argmax(-1)
    
    # We will map the maximum predicted class id with the class label
    predicted_tokens_classes = [model.config.id2label[t.item()] for t in logits[0]]
    
    predicted_labels = []
    
    previous_token_id = 0
    # we need to assign the named entity label to the head word and not the following sub-words
    word_ids = tok_sentence.word_ids()
    for word_index in range(len(word_ids)):
        if word_ids[word_index] == None:
            previous_token_id = word_ids[word_index]
        elif word_ids[word_index] == previous_token_id:
            previous_token_id = word_ids[word_index]
        else:
            predicted_labels.append( predicted_tokens_classes[ word_index ] )
            previous_token_id = word_ids[word_index]
    
    return predicted_labels

In [56]:
s = "अभिनेत्री सोहा अली खान से उनकी मां अभिनेत्री शर्मिला टैगोर खासी नाराज़ हैं."
print(get_predictions(s, tokenizerN, model_trained_NER))

['O', 'B-PER', 'I-PER', 'I-PER', 'O', 'O', 'O', 'O', 'B-PER', 'I-PER', 'O', 'O', 'O']


In [6]:
resultB = []
resultN = []
for x in text:
    outputB = get_predictions(x[0], tokenizerB, model_trained_Bert)
    outputN = get_predictions(x[0], tokenizerN, model_trained_NER)
    resultB.append(outputB)
    resultN.append(outputN)
    print(outputB)
    print(outputN)




    

['O', 'O', 'O', 'O', 'O', 'B-LOC', 'I-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'B-PER', 'I-PER', 'I-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'

## Loading CHATGPT results

In [8]:
with open('./Q3_ChatGPT_ner_tags.txt','r',encoding='utf-8') as file:
    gpt = file.readlines()
    truth_gpt= []
    for x in gpt:
        x = x.strip()
        x = x.split()
        
        truth_gpt.append(x)
        #remove empty []
        truth_gpt = [x for x in truth_gpt if x != []]
    print(truth_gpt)

[['O', 'O', 'O', 'O', 'O', 'B-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], ['O', 'O', 'B-PER', 'I-PER', 'I-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',

## Comparing F1_Score of truth manually with CHatGPT, BERT and NER model

In [9]:
from sklearn.metrics import precision_score, recall_score, f1_score
def calculate_metrics(true_labels, predicted_labels):
    
    true_labels_flat = [label for sublist in true_labels for label in sublist]
    predicted_labels_flat = [label for sublist in predicted_labels for label in sublist]
    
    #because manual labelling and different tokenizer can can generate different length token lists 
    minlen = min(len(true_labels_flat),len(predicted_labels_flat))
    true_labels_flat = true_labels_flat[:minlen]
    predicted_labels_flat = predicted_labels_flat[:minlen]
    
    precision = precision_score(true_labels_flat, predicted_labels_flat, average='weighted')
    recall = recall_score(true_labels_flat, predicted_labels_flat, average='weighted')
    f1 = f1_score(true_labels_flat, predicted_labels_flat, average='weighted')

    return precision, recall, f1

### High score I have trimmed the length to min length because of that many tags were omitted.

In [10]:
prec_gpt,recall_gpt,f1_gpt = calculate_metrics(truth_gpt,resultB)
print(f"Precision(GPT) is : {prec_gpt}")
print(f"Recall(GPT) is : {recall_gpt}")
print(f"F1(GPT) is : {f1_gpt}")

prec_B,recall_B,f1_B = calculate_metrics(truth,resultB)
print(f"Precision(Bert) is : {prec_B}")
print(f"Recall(Bert) is : {recall_B}")
print(f"F1(Bert) is : {f1_B}")

prec_N,recall_N,f1_N = calculate_metrics(truth,resultN)
print(f"Precision(NER) is : {prec_N}")
print(f"Recall(NER) is : {recall_N}")
print(f"F1(NER) is : {f1_N}")



Precision(GPT) is : 0.9504640734174574
Recall(GPT) is : 0.9417040358744395
F1(GPT) is : 0.9460637767812656
Precision(Bert) is : 0.9231532658949492
Recall(Bert) is : 0.9506726457399103
F1(Bert) is : 0.9364344481445991
Precision(NER) is : 0.8702655265044896
Recall(NER) is : 0.874439461883408
F1(NER) is : 0.8723475014482803


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
