In [1]:
import numpy as np
import pandas as pd

In [2]:
# Read File From chat_gpt
with open('chat_gpt_output.txt', 'r',encoding = 'utf-8') as file:
    # Read all lines from the file
    lines = file.readlines()

In [3]:
chatgpt_sentence_list = [] 
chatgpt_ner_unfiltered_list = []

i = 0
length = len(lines)
print(length)
while i < length:
    chatgpt_sentence_list.append(lines[i])
    i+=1
    chatgpt_ner_unfiltered_list.append(lines[i])
    i+=2
print(len(chatgpt_ner_unfiltered_list))

74
25


In [4]:
# Refining ner_list
chatgpt_ner_list = []
for line in chatgpt_ner_unfiltered_list:
    word_list = line.split(',')
    ner_word_tags = []
    for word in word_list:
        ner_word_tags.append(word.split(':')[1].strip())
    chatgpt_ner_list.append(ner_word_tags)
print(len(chatgpt_ner_list))

25


In [5]:
# Reading Manual Dataset
with open('manual_input.txt', 'r',encoding = 'utf-8') as file:
    # Read all lines from the file
    lines = file.readlines()


In [6]:
manual_sentence_list = [] 
manual_ner_unfiltered_list = []

i = 0
length = len(lines)
while i < length:
    manual_sentence_list.append(lines[i])
    i+=1
    manual_ner_unfiltered_list.append(lines[i])
    i+=2

In [7]:
print(manual_sentence_list[0])

अब आर-पार की लड़ाई लड़ने का ऐलान करते हुए दूसरे विभागों के कर्मचारी भी समर्थन में आ गए हैं।



In [8]:
# Refining manual_ner_list
manual_ner_list = []
manual_ner_dict = []
for line in manual_ner_unfiltered_list:
    word_list = line.split(',')
    ner_word_tags = []
    ner_word_dict = {}
    for word in word_list:
        ner_word_tags.append(word.split(':')[1].strip())
        ner_word_dict[word.split(':')[0].strip()] = word.split(':')[1].strip()
    manual_ner_dict.append(ner_word_dict)
    manual_ner_list.append(ner_word_tags)
print(len(manual_ner_list))

25


In [11]:
from sklearn.metrics import precision_recall_fscore_support

In [12]:
# Change String Tag to ID
ner_number = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6 , 'B-MISC' : 7 , 'I-MISC' : 8}
label_list = ner_number
def change_ner_to_id(ner_list):
    id_list = []
    for tag in ner_list:
        if tag.replace(" ","") in ner_number:
            id_list.append(ner_number[tag.replace(" ","")])
        else:
            id_list.append(0)
        
    return id_list

In [13]:
def getFlattenList(nested_list):
    flattened_list = [item for sublist in nested_list for item in sublist]
    return flattened_list

In [14]:
def getScoreHelper(y_pred, y_true,positive_label = 1):
    # Calculate precision, recall, and F1-score
    precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    return precision, recall, f1_score

In [15]:
def getScore(true_list,predicted_list,curr_tag):
    true_list_zero_one = []
    for tag in true_list:
        true_list_zero_one.append(1 if tag==curr_tag else 0)
    predicted_list_zero_one = []
    for tag in predicted_list:
        predicted_list_zero_one.append(1 if tag==curr_tag else 0)
    return getScoreHelper(predicted_list_zero_one,true_list_zero_one)

In [16]:
def evaluateModel(true_list,predicted_list):
    # Flatten List
    true_list_flatten = getFlattenList(true_list)
    predicted_list_flatten = getFlattenList(predicted_list)
    
    #Change String Tag to ID
    true_id_list = change_ner_to_id(true_list_flatten)
    predicted_id_list = change_ner_to_id(predicted_list_flatten)
    
    precision_list = [] 
    recall_list = []
    f1_score_list = []
    
    for i in range(0,9):
        precision, recall, f1_score = getScore(true_id_list,predicted_id_list,i)
        precision_list.append(precision)
        recall_list.append(recall)
        f1_score_list.append(f1_score)
    
    return precision_list,recall_list,f1_score_list

# Comparing ChatGPT against Ground Truth

In [17]:
precision_list, recall_list , f1_score_list = evaluateModel(manual_ner_list,chatgpt_ner_list)

In [18]:
# Computing Macro Score
def average(lst):
    if not lst:
        return 0  # return 0 if the list is empty to avoid division by zero
    return sum(lst) / len(lst)

print("Precision "  ,average(precision_list))
print("Recall " ,average(recall_list))
print("Macro-F1-Score " , average(f1_score_list))

Precision  0.6384709629992648
Recall  0.5559982983628303
Macro-F1-Score  0.520102990691226


In [19]:
def getDataframe(labels,precision_list,recall_list,f1_score_list):
    data = {'Label': list(labels.keys()),
        'Precision': precision_list,
        'Recall': recall_list,
        'F1-Score': f1_score_list}
    return pd.DataFrame(data)
    

In [20]:
# Evaluation of Indic NER
print("-------------------------------------------------------------------------------------")
print("-----------------------------------ChatGPT------------------------------------------")
print("-------------------------------------------------------------------------------------")
indic_ner_dataframe = getDataframe(label_list,precision_list, recall_list , f1_score_list)
print(indic_ner_dataframe)

# Macro F1 - Score
print("Macro-F1-Score : " , sum(f1_score_list) / len(f1_score_list))

-------------------------------------------------------------------------------------
-----------------------------------ChatGPT------------------------------------------
-------------------------------------------------------------------------------------
    Label  Precision    Recall  F1-Score
0       O   0.917667  0.992579  0.953654
1   B-PER   1.000000  0.500000  0.666667
2   I-PER   0.750000  1.000000  0.857143
3   B-ORG   0.400000  0.400000  0.400000
4   I-ORG   0.000000  0.000000  0.000000
5   B-LOC   0.428571  1.000000  0.600000
6   I-LOC   1.000000  1.000000  1.000000
7  B-MISC   0.250000  0.034483  0.060606
8  I-MISC   1.000000  0.076923  0.142857
Macro-F1-Score :  0.520102990691226


# Comparing IndicNER and Ground Truth

In [21]:
!pip3 install transformers
!pip install transformers[torch]



In [22]:
from transformers import AutoModelForTokenClassification, AutoConfig, AutoTokenizer
import torch

In [24]:
import json
config_indic_ner = json.load(open("./model_fine_tune_indicner/config.json"))

In [25]:
label_list_config = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6}

In [26]:
label2id = {
    str(i): label for i,label in label_list_config.items()
}
id2label = {
    label: str(i) for i,label in label_list_config.items()
}
print(label2id)
print(id2label)

{'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6}
{0: 'O', 1: 'B-PER', 2: 'I-PER', 3: 'B-ORG', 4: 'I-ORG', 5: 'B-LOC', 6: 'I-LOC'}


In [27]:
config_indic_ner["id2label"] = id2label
config_indic_ner["label2id"] = label2id

In [28]:
json.dump(config_indic_ner, open("./model_fine_tune_indicner/config.json","w"))

In [29]:
model_indic_ner = AutoModelForTokenClassification.from_pretrained('./model_fine_tune_indicner')
tokenizer_indic_ner = AutoTokenizer.from_pretrained("./model_fine_tune_indicner")

  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytree_node(


In [30]:
model_indic_ner.config

BertConfig {
  "_name_or_path": "./model_fine_tune_indicner",
  "architectures": [
    "BertForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "finetuning_task": "ner",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "O",
    "1": "B-PER",
    "2": "I-PER",
    "3": "B-ORG",
    "4": "I-ORG",
    "5": "B-LOC",
    "6": "I-LOC"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "B-LOC": 5,
    "B-ORG": 3,
    "B-PER": 1,
    "I-LOC": 6,
    "I-ORG": 4,
    "I-PER": 2,
    "O": 0
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_tr

In [31]:
def get_ner(sentence,model,tokenizer):
    tok_sentence = tokenizer(sentence, return_tensors='pt')

    with torch.no_grad():
        logits = model(**tok_sentence).logits.argmax(-1)
#         print(logits)
        predicted_tokens_classes = [
            model.config.id2label[t.item()] for t in logits[0]]
#         print(predicted_tokens_classes)
        predicted_labels = []

        previous_token_id = 0
        word_ids = tok_sentence.word_ids()
        for word_index in range(len(word_ids)):
            if word_ids[word_index] == None:
                previous_token_id = word_ids[word_index]
            elif word_ids[word_index] == previous_token_id:
                previous_token_id = word_ids[word_index]
            else:
                predicted_labels.append(predicted_tokens_classes[word_index])
                previous_token_id = word_ids[word_index]

        ner_output = []
        for index in range(len(sentence.split(' '))):
            if(index<len(predicted_labels)):
                ner_output.append((sentence.split(' ')[index], predicted_labels[index]))
            else:
                ner_output.append((sentence.split(' ')[index], 'O'))
        return ner_output

In [32]:
print(get_ner(manual_sentence_list[0],model_indic_ner,tokenizer_indic_ner))

[('अब', 'O'), ('आर-पार', 'B-LOC'), ('की', 'I-LOC'), ('लड़ाई', 'I-LOC'), ('लड़ने', 'O'), ('का', 'O'), ('ऐलान', 'O'), ('करते', 'O'), ('हुए', 'O'), ('दूसरे', 'O'), ('विभागों', 'O'), ('के', 'O'), ('कर्मचारी', 'O'), ('भी', 'O'), ('समर्थन', 'O'), ('में', 'O'), ('आ', 'O'), ('गए', 'O'), ('हैं।\n', 'O')]


In [33]:
def getDataframe(labels,precision_list,recall_list,f1_score_list):
    data = {'Label': list(label_list.keys()),
        'Precision': precision_list,
        'Recall': recall_list,
        'F1-Score': f1_score_list}
    return pd.DataFrame(data)
    

In [34]:
# Finding NER Tags for IndicNER
IndicNER_ner_list_irregular = []
for i in range(25):
    temp_list = get_ner(manual_sentence_list[i],model_indic_ner,tokenizer_indic_ner)
    line_ner_list = []
    for tuple_pair in temp_list:
        line_ner_list.append(tuple_pair[1])
    IndicNER_ner_list_irregular.append(line_ner_list)

In [35]:
count = {}
for i in range(25):
    for tag in IndicNER_ner_list_irregular[i]:
        if tag in count:
            count[tag]+=1
        else:
            count[tag]=1
print(count)

{'O': 577, 'B-LOC': 6, 'I-LOC': 3, 'B-ORG': 6, 'B-PER': 13, 'I-PER': 8, 'I-ORG': 8}


In [36]:
# Handling Irregular Shape of IndicNER
IndicNER_ner_list = []
for i in range(25):
    if len(IndicNER_ner_list_irregular[i]) > len(manual_ner_list[i]):
        difference = len(IndicNER_ner_list_irregular[i]) - len(manual_ner_list[i])
        IndicNER_ner_list.append(IndicNER_ner_list_irregular[i][:-difference])
    elif len(IndicNER_ner_list_irregular[i]) < len(manual_ner_list[i]):
        difference = len(manual_ner_list[i]) - len(IndicNER_ner_list_irregular[i])
        IndicNER_ner_list_irregular[i] += ['O'] * difference
        IndicNER_ner_list.append(IndicNER_ner_list_irregular[i])
    else:
        IndicNER_ner_list.append(IndicNER_ner_list_irregular[i])

In [37]:
precision_list, recall_list , f1_score_list = evaluateModel(manual_ner_list,IndicNER_ner_list)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [38]:
# Evaluation of Indic NER
print("-------------------------------------------------------------------------------------")
print("-----------------------------------IndicNER------------------------------------------")
print("-------------------------------------------------------------------------------------")
indic_ner_dataframe = getDataframe(label_list,precision_list, recall_list , f1_score_list)
print(indic_ner_dataframe)

# Macro F1 - Score
print("Macro-F1-Score : " , sum(f1_score_list) / len(f1_score_list))

-------------------------------------------------------------------------------------
-----------------------------------IndicNER------------------------------------------
-------------------------------------------------------------------------------------
    Label  Precision    Recall  F1-Score
0       O   0.912892  0.972171  0.941599
1   B-PER   0.615385  0.500000  0.551724
2   I-PER   0.500000  0.666667  0.571429
3   B-ORG   0.500000  0.600000  0.545455
4   I-ORG   0.625000  0.833333  0.714286
5   B-LOC   0.500000  1.000000  0.666667
6   I-LOC   0.333333  1.000000  0.500000
7  B-MISC   0.000000  0.000000  0.000000
8  I-MISC   0.000000  0.000000  0.000000
Macro-F1-Score :  0.49901765744316173


# Comparing IndicBERT and Ground Truth

In [39]:
import json
config_indicbert = json.load(open("./model_fine_tune_indicbert/config.json"))
config_indicbert["id2label"] = id2label
config_indicbert["label2id"] = label2id
json.dump(config_indicbert, open("./model_fine_tune_indicbert/config.json","w"))

model_indicbert = AutoModelForTokenClassification.from_pretrained('./model_fine_tune_indicbert')
tokenizer_indicbert = AutoTokenizer.from_pretrained("./model_fine_tune_indicbert")

  torch.utils._pytree._register_pytree_node(


In [40]:
# Finding Bert Tags for IndicBERT
Indicbert_ner_list_irregular = []
for i in range(25):
    temp_list = get_ner(manual_sentence_list[i],model_indicbert,tokenizer_indicbert)
    line_ner_list = []
    for tuple_pair in temp_list:
        line_ner_list.append(tuple_pair[1])
    Indicbert_ner_list_irregular.append(line_ner_list)

In [41]:
# Handling Irregular Shape of IndicBERT
IndicBert_ner_list = []
for i in range(25):
    if len(Indicbert_ner_list_irregular[i]) > len(manual_ner_list[i]):
        difference = len(Indicbert_ner_list_irregular[i]) - len(manual_ner_list[i])
        IndicBert_ner_list.append(Indicbert_ner_list_irregular[i][:-difference])
    elif len(Indicbert_ner_list_irregular[i]) < len(manual_ner_list[i]):
        difference = len(manual_ner_list[i]) - len(Indicbert_ner_list_irregular[i])
        Indicbert_ner_list_irregular[i] += ['O'] * difference
        IndicBert_ner_list.append(Indicbert_ner_list_irregular[i])
    else:
        IndicBert_ner_list.append(Indicbert_ner_list_irregular[i])

In [42]:
precision_list, recall_list , f1_score_list = evaluateModel(manual_ner_list,IndicBert_ner_list)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [43]:
# Evaluation of Indic IndicBERT
indic_bert_dataframe = getDataframe(label_list,precision_list, recall_list , f1_score_list)
print(indic_bert_dataframe)

# Macro F1 - Score
print("Macro-F1-Score : " , sum(f1_score_list) / len(f1_score_list))

    Label  Precision    Recall  F1-Score
0       O   0.906412  0.970315  0.937276
1   B-PER   0.400000  0.375000  0.387097
2   I-PER   0.714286  0.833333  0.769231
3   B-ORG   0.571429  0.800000  0.666667
4   I-ORG   0.750000  0.500000  0.600000
5   B-LOC   0.285714  0.666667  0.400000
6   I-LOC   1.000000  1.000000  1.000000
7  B-MISC   0.000000  0.000000  0.000000
8  I-MISC   0.000000  0.000000  0.000000
Macro-F1-Score :  0.5289189106393407
