In [None]:
import yaml
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import langid

In [None]:
from src.idsp_kedro.pipelines.disease_ner.disease_ner_nodes import DiseaseTagger
from src.idsp_kedro.pipelines.relevancy_classifier.nodes import RelevanceTagger
from src.idsp_kedro.pipelines.translate.nodes import Translator
from src.idsp_kedro.pipelines.event_extraction.extraction_nodes import EventExtractor
from src.idsp_kedro.pipelines.location_date_ner.nodes import LocationDateTagger
from src.idsp_kedro.pipelines.event_extraction.filtering_nodes import EventFilterer
from src.idsp_kedro.pipelines.noevents_extraction.extraction_nodes import NoEventExtractorNLI

# Dataset

In [None]:
path = './750_annotated_articles.csv'
main_df = pd.read_csv(path, encoding='utf-8', lineterminator='\n')  
print('len of main_df:', main_df.shape[0])

main_df['GT_Events'] = main_df['GT_Events'].apply(eval)
main_df = main_df[main_df['Article'].notnull()]
main_df.reset_index(drop=True, inplace=True)
print('Number of Articles after removing empty articles:', main_df.shape[0])

N_no_events = sum(main_df['GT_Events'].apply(lambda x: 0 if len(x) else 1))
N_events = main_df.shape[0] - N_no_events

main_df['complete_article'] = main_df['Article'].copy()
main_df['lang'] = 'en'
print('Articles with atleast one Event:', N_events)
print('Number of no event Articles:', N_no_events)


In [None]:
histogram = main_df['GT_Events'].apply(len).value_counts().sort_index()
sns.set(style="whitegrid")
plt.figure(figsize=(10, 6))
ax = sns.barplot(x=histogram.index, y=histogram.values, palette="Blues_d")
ax.set(xlabel='Number of Events', ylabel='Number of Articles')
# write value on top of each bar
for i in range(len(histogram)):
    ax.text(i, histogram.values[i], histogram.values[i], ha = 'center')
plt.title('Number of Events per Article')
plt.show()

In [None]:
class Pipeline:
    def __init__(self):
        self.params = yaml.safe_load(open("/home/ubuntu/devesh/Prod_change_12th_jan/idsp_5th_feb/idsp-score/conf/local/parameters.yml"))
        self.params["common_params"]["lang_dict"] = "/home/ubuntu/devesh/Prod_change_12th_jan/idsp_5th_feb/idsp-score/data/05_model_input/lang_dict.json"
        self.device_params = self.params["device_params"]
        self.common_params = self.params["common_params"]
        
        
    def language_classifier(self, df):
        df['lang'] = df['Original_Article'].apply(lambda x: langid.classify(x)[0])
        return df
    
    def relevancy_classifier(self, df):
        
        print('Relevancy Classifier...')
        relevance_params = self.params["relevance_params"]
        relevance_tagger_obj = RelevanceTagger(
                 relevance_params, self.device_params, self.common_params
         )        
        # we don't have title and description separate, we perform it on the whole article one by one
        def get_relevance_per_article(article, lang, threshold=0.5):
            pred_article, confidence = relevance_tagger_obj.get_relevance_per_article(article=article, lang=lang)
            return pred_article, confidence

        relevancy_preds = []
        confidence_preds = []
        for itr, article in enumerate(df['Original_Article']):
            relevancy, confidence = get_relevance_per_article(article, df['lang'][itr])
            relevancy_preds.append(relevancy)
            confidence_preds.append(confidence)

        df['relevant'] = [1 if i == 1 else 0 for i in relevancy_preds]
  
        return df, confidence_preds
    
    def translate(self, df):
        translator_obj = Translator(common_params=self.common_params)
        translated_articles, avg_time_per_lang_per_word = translator_obj.run_translate_to_en(articles=df['Original_Article'])
        df['complete_article'] = translated_articles
        df['lang'] = 'en'

        return df, avg_time_per_lang_per_word
    
    def disease_ner(self, df):
        
        print('Disease NER...')
        disease_tagger_obj = DiseaseTagger(
        disease_ner_params=self.params["disease_ner_params"],
        device_params=self.params["device_params"],
        common_params=self.params["common_params"])
        disease_pred = []
        articles = df['complete_article']
        
        df = disease_tagger_obj.perform_disease_ner_on_df(df)
        
        return df
      
    def location_ner(self, df):
        print('Location NER...')
        location_tagger_obj = LocationDateTagger(
            ner_params=self.params["location_date_ner_params"],
            common_params=self.params["common_params"])
        
        df = location_tagger_obj.get_location_date_for_df(df)    
        
        return df
        
    def qa(self, df):
        print('QA...')
        event_extractor_obj = EventExtractor(
            event_ext_params=self.params["event_extraction_params"],
            device_params=self.device_params,
            common_params=self.common_params,
        )
        event_filter_obj = EventFilterer(common_params=self.common_params)
        total_events_list = event_extractor_obj.perform_event_extraction(df, preprocess=False)
        df = event_filter_obj.filter_events_df(df, total_events_list)
        return df

        
    def nli(self, df):
        print('Numberless Event Extraction...')
        noevent_extraction_params=self.params["noevent_extraction_params"]
        noevent_ext_obj = NoEventExtractorNLI(noevent_extraction_params,
                                              self.device_params,
                                              self.common_params)
        
        total_events_list = noevent_ext_obj.perform_noevent_extraction_and_filter(df, preprocess=False)
        df["noevents_events"] = total_events_list
        
        return df
        


In [None]:
# Translation only
pipeline = Pipeline()
df, avg_time_per_lang_per_word = pipeline.translate(main_df)
# rename complete article to indictransv2_article
df.rename(columns={'complete_article': 'indictransv2_article'}, inplace=True)
df.to_csv('750_translated_articles_indictransv2.csv', index=False)

In [None]:

pipeline = Pipeline()
# Relevancy Classifier

df, _ = pipeline.relevancy_classifier(main_df)

# Disease NER
df = pipeline.disease_ner(df)
df['disease_pred'] = df['disease_pred'].apply(lambda x: ', '.join([i.lower() for i in x]))
df['disease_by_keyword_spotting'] = df['disease_by_keyword_spotting'].apply(lambda x: ', '.join([i.lower() for i in x]))
df['disease_by_ner'] = df['disease_by_ner'].apply(lambda x: ', '.join([i.lower() for i in x]))
# Copy diseases from GT for QA and NLI
df['diseases'] = df['GT_Events'].apply(lambda x: [i['Disease'] for i in x])
df['diseases'] = df['diseases'].apply(lambda x: ', '.join([i for i in x]))
df['diseases'] = df['diseases'].apply(lambda x: ', '.join(set(x.split(', '))))

# Location NER
df = pipeline.location_ner(df)
df.reset_index(drop=True, inplace=True)
df = df.rename(columns={'location': 'locations_pred'})
df = df.rename(columns={'dates': 'dates_pred'})
df['date'] = ''
# Copy locations from GT for QA and NLI
df['location'] = df['GT_Events'].apply(lambda x: [i['Location'] for i in x])
df['location'] = df['location'].apply(lambda x: ', '.join([i.lower() for i in x]))
df['location'] = df['location'].apply(lambda x: ', '.join(set(x.split(', '))))
# for each location copy it to a tuple, for example: india, kerala - [(india, india), (kerala, kerala)]
df['location'] = df['location'].apply(lambda x: [(i, i) for i in x.split(', ') if len(i)])

# QA
df = pipeline.qa(df)
df.loc[:,"old_data_cluster_idx"] = ''

# NLI
df = pipeline.nli(df)

# Relevancy Classification

In [None]:
# Evaluate Relevancy Classifer  

def evaluate_relevancy(df):
    print('Total number of articles:', len(df))
    df['gt'] = df['GT_Events'].apply(lambda x: 1 if len(x) > 0 else 0)
    tp = sum((df['gt'] == 1) & (df['relevant'] == 1))
    fp = sum((df['gt'] == 0) & (df['relevant'] == 1))
    fn = sum((df['gt'] == 1) & (df['relevant'] == 0))
    tn = sum((df['gt'] == 0) & (df['relevant'] == 0))
    
    
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * (precision * recall) / (precision + recall)
    accuracy = (tp + tn)/(tp + fp + tn + fn)
    
    tpr = recall
    fpr = fp / (fp + tn)
    
    return precision, recall, f1, accuracy, tpr, fpr
    

# Relevancy Classifier
pipeline = Pipeline()
precision_list = []; recall_list = []; f1_list = []; accuracy_list = []
df = pipeline.language_classifier(main_df)
df, confidence_preds = pipeline.relevancy_classifier(df)
precision, recall, f1, accuracy, tpr, fpr = evaluate_relevancy(df)
precision_list.append(precision)
recall_list.append(recall)
f1_list.append(f1)
accuracy_list.append(accuracy)


In [None]:
# compute precision, recall, f1, accuracy based on language df['lang'] for each language
for lang in ['en', 'hi', 'te']:
    df_lang = df[df['lang'] == lang]
    precision, recall, f1, accuracy, tpr, fpr = evaluate_relevancy(df_lang)
    print(f'Lang: {lang}, Precision: {precision}, Recall: {recall}, F1: {f1}, Accuracy: {accuracy}, TPR: {tpr}, FPR: {fpr}')

In [None]:
import numpy as np
from sklearn import metrics
# confidence_preds = np.array(confidence_preds)[:, 1:].squeeze()
gt = np.array(df['gt'])
fpr, tpr, thresholds = metrics.roc_curve(gt, confidence_preds)
roc_auc = metrics.auc(fpr, tpr) 
plt.figure(figsize=(10, 6))
plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()
print('AUC:', roc_auc)



In [None]:
# get confusion matrix at 0.5 threshold
threshold = 0.5
pred = [1 if i > threshold else 0 for i in confidence_preds]
confusion_matrix = metrics.confusion_matrix(gt, pred)
print('Confusion Matrix at 0.5 threshold:', confusion_matrix)
print('Precision:', precision)
print('Recall:', recall)
print('F1:', f1)
print('Accuracy:', accuracy)

# plot confusion matrix
import seaborn as sns
import matplotlib.pyplot as plt
sns.heatmap(confusion_matrix, annot=True, fmt='g')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# QA 

In [None]:
# Copy diseases from GT for QA and NLI
df = main_df.copy()
# keep only those articles where events are present
df = df[df['GT_Events'].apply(len) > 0]
df = df.reset_index(drop=True)
print('Total number of articles:', len(df))
df['diseases'] = df['GT_Events'].apply(lambda x: [i['Disease'] for i in x])
df['diseases'] = df['diseases'].apply(lambda x: ', '.join([i for i in x]))
df['diseases'] = df['diseases'].apply(lambda x: ', '.join(set(x.split(', '))))

df['date'] = ''
# Copy locations from GT for QA and NLI
df['location'] = df['GT_Events'].apply(lambda x: [i['Location'] for i in x])
df['location'] = df['location'].apply(lambda x: ', '.join([i.lower() for i in x]))
df['location'] = df['location'].apply(lambda x: ', '.join(set(x.split(', '))))

print('Total no of non empty GT events:', df['GT_Events'].apply(len).sum())


In [None]:
params = yaml.safe_load(open("/home/ubuntu/devesh/Prod_change_12th_jan/idsp_5th_feb/idsp-score/conf/local/parameters.yml"))
params["common_params"]["lang_dict"] = "/home/ubuntu/devesh/Prod_change_12th_jan/idsp_5th_feb/idsp-score/data/05_model_input/lang_dict.json"
device_params = params["device_params"]
common_params = params["common_params"]

event_extractor_obj = EventExtractor(
                event_ext_params=params["event_extraction_params"],
                device_params=device_params,
                common_params=common_params,
                )
         
tp = 0; fp = 0; fn = 0; tn = 0
excact_match = 0; total_numbered_events = 0; no_ans = 0; incorrect_ans = 0
for i in range(df.shape[0]):
    article = df['complete_article'][i]
    events = df['GT_Events'][i]
    for event in events:
        if event['Number'] == '': # consider only numbered events
            continue
        
        incident_type = event['Incident Type (new or total)']
        case_or_death = event['Incident (case or death)']
        row = pd.Series()
        row["complete_article"] = article
        row["location"] = [(event['Location'], event['Location'])]
        row["date"] = None
        row["diseases"] = event['Disease']
        row["lang"] = 'en'
        total_numbered_events += 1
        events_list = event_extractor_obj.perform_event_extraction_per_article(row, incident_type, case_or_death)
        if len(events_list) > 1:
            print('More than one event:', events_list)
        
        
# Print results
# print('Total Numbered Events:', total_numbered_events)
# print('Exact Match:', excact_match)
# print('No Answer:', no_ans)
# print('Incorrect Answer:', incorrect_ans)
# print('Accuracy:', excact_match/total_numbered_events)
# print('No Answer Percentage:', no_ans/total_numbered_events)
# print('Incorrect Answer Percentage:', incorrect_ans/total_numbered_events)
        
        

# NLI

In [None]:

df = main_df.copy()
df = df[df['GT_Events'].apply(len) > 0]
df = df.reset_index(drop=True)
# print total number of events
print('Total Number of Events:', df['GT_Events'].apply(len).sum())


In [None]:
params = yaml.safe_load(open("/home/ubuntu/devesh/Prod_change_12th_jan/idsp_5th_feb/idsp-score/conf/local/parameters.yml"))
params["common_params"]["lang_dict"] = "/home/ubuntu/devesh/Prod_change_12th_jan/idsp_5th_feb/idsp-score/data/05_model_input/lang_dict.json"
device_params = params["device_params"]
common_params = params["common_params"]
noevent_extraction_params=params["noevent_extraction_params"]
noevent_ext_obj = NoEventExtractorNLI(noevent_extraction_params,
                                device_params,
                                common_params)
         
correct = 0; total = 0
print(len(df))
for i in range(len(df)):
    article = df['complete_article'][i]
    events = df['GT_Events'][i]
    for event in events:        
        if event['Number'] != '': # consider only no numbered events
                continue
        case_or_death = event['Incident (case or death)']
        row = pd.Series()
        row["complete_article"] = article
        row["location"] = [(event['Location'], event['Location'])]
        row["diseases"] = event['Disease']
        row["lang"] = 'en'
        events_list = noevent_ext_obj.perform_noevent_extraction_per_article(row, case_or_death)
        if len(events_list) > 1:
                print('More than one event:', events_list)
        
        if len(events_list) == 1:
            correct += 1
            
        total += 1
        
# Print results
print('Total Numberless Events:', total)
print('Correct:', correct)
print('Accuracy:', correct/total)



In [None]:
df.to_csv('final_output_each_phase.csv', index=False)

# Evaluation

In [None]:

import pandas as pd
orig_df = pd.read_csv('final_output_each_phase.csv')
df = orig_df.copy()

# Preprocess data
def rename_keys(d):
    return {'incident' if k == 'Incident (case or death)' else 'incident_type' if k == 'Incident Type (new or total)' else k: v for k, v in d.items()}

def lower_case_keys(d):
    return {k.lower(): v.lower() if isinstance(v, str) else v for k, v in d.items()}

df['GT_Events'] = df['GT_Events'].apply(eval)
df['events'] = df['events'].apply(eval)
df['noevents_events'] = df['noevents_events'].apply(eval)

df['GT_Events'] = df['GT_Events'].apply(lambda x: [rename_keys(i) for i in x])
df['GT_Events'] = df['GT_Events'].apply(lambda x: [lower_case_keys(i) for i in x])
df['events'] = df['events'].apply(lambda x: [lower_case_keys(i) for i in x])


df['disease_by_keyword_spotting'] = df['disease_by_keyword_spotting'].apply(lambda x: ', '.join([i.lower() for i in eval(x)]))
df['disease_by_ner'] = df['disease_by_ner'].apply(lambda x: ', '.join([i.lower() for i in eval(x)]))

# remove rows with no events
df = df[df['GT_Events'].apply(len) > 0]

print('Total Articles:', df.shape[0])

In [None]:
import json
class EventEvaluation:
    def __init__(self, df):
        self.df = df
        self.synonyms = json.load(open("/home/ubuntu/devesh/Prod_change_12th_jan/idsp_5th_feb/idsp-score/data/05_model_input/disease_synonyms.json"))

    def group_diseases(self, events):
        new_events = []
        for event in events:
            disease = event["disease"]
            flag = 0
            for key in self.synonyms.keys():
                synonyms_for_disease = [x.lower() for x in self.synonyms[key]]
                if disease.lower().lstrip().rstrip() in synonyms_for_disease:
                    event["disease"] = key.lower()
                    event["original_disease"] = disease
                    flag = 1
                    break  
            if flag == 0:
                event["disease"] = disease
                event["original_disease"] = disease
                
            new_events.append(event)
        return new_events

    def precision_recall_method_1(self, pred, gt):
        gt = set(tuple(sorted(d.items())) for d in gt)
        pred = set(tuple(sorted(d.items())) for d in pred)
        
        tp = len(pred.intersection(gt))
        fp = len(pred.difference(gt))
        fn = len(gt.difference(pred))
        
    
        if tp == 0 and fp == 0 and fn == 0:
            precision = 1.0; recall = 1.0; f1 = 1.0
        elif tp == 0 and (fp > 0 or fn > 0):
            precision = 0.0; recall = 0.0; f1 = 0.0
        else:
            precision = tp / (tp + fp)
            recall = tp / (tp + fn)
            f1 = 2 * (precision * recall) / (precision + recall)
        exact_match = 1.0 if pred == gt else 0.0
        
        return precision, recall, f1, exact_match
    
    def precision_recall_method_2(self, pred, gt):
        if len(pred) and isinstance(pred[0], dict):
            gt = set(tuple(sorted(d.items())) for d in gt)
            pred = set(tuple(sorted(d.items())) for d in pred)
        elif isinstance(pred, list):
            # hand list of list of strings, d.items won't work here
            gt = set(gt)
            pred = set(pred)
            
        print(pred, gt)
        tp = len(pred.intersection(gt))
        fp = len(pred.difference(gt))
        fn = len(gt.difference(pred))
            
        if tp == 0 and fp == 0 and fn == 0:
            precision = 1.0; recall = 1.0; f1 = 1.0
        elif tp == 0 and (fp > 0 or fn > 0):
            precision = 0.0; recall = 0.0; f1 = 0.0
        else:
            precision = tp / (tp + fp)
            recall = tp / (tp + fn)
            f1 = 2 * (precision * recall) / (precision + recall)
        exact_match = 1.0 if pred == gt else 0.0
        
        return precision, recall, f1, exact_match

    def jaccard_index(self, pred, gt):
        # intersection over union
        gt = set(tuple(sorted(d.items())) for d in gt)
        pred = set(tuple(sorted(d.items())) for d in pred)
        if len(pred) == 0 and len(gt) == 0: return 1
        intersection = len(pred.intersection(gt))
        union = len(pred.union(gt))
        return intersection / union
    

    def subset_accuracy(self, pred, gt):
        gt = set(tuple(sorted(d.items())) for d in gt)
        pred = set(tuple(sorted(d.items())) for d in pred)
        return float(pred.issubset(gt))

    def evaluate_event_extraction(self):
        # Group diseases using synonyms list
        self.df['GT_Events'] = self.df['GT_Events'].apply(lambda events: self.group_diseases(events))
        self.df['events'] = self.df['events'].apply(lambda events: self.group_diseases(events))
        
        # keep only keys disease, location, incident, incident_type, number for comparison

        def keys_to_keep(d):
            return {k: v for k, v in d.items() if k in ['disease', 'location', 'incident', 'incident_type', 'number']}
        self.df['GT_Events'] = self.df['GT_Events'].apply(lambda x: [keys_to_keep(i) for i in x])
        self.df['events'] = self.df['events'].apply(lambda x: [keys_to_keep(i) for i in x])
        
        # apply precision_recall_method_2 to each row
        self.df['precision'], self.df['recall'], self.df['f1'], self.df['exact_match'] = zip(*self.df.apply(lambda row: self.precision_recall_method_2(row['events'], row['GT_Events']), axis=1))
        self.df['jaccard'] = self.df.apply(lambda row: self.jaccard_index(row['events'], row['GT_Events']), axis=1)
        self.df['subset'] = self.df.apply(lambda row: self.subset_accuracy(row['events'], row['GT_Events']), axis=1)
        
        metrics = {
            "precision": self.df['precision'].mean(),
            "recall": self.df['recall'].mean(),
            "f1": self.df['f1'].mean(),
            "exact_match": self.df['exact_match'].mean(),
            "jaccard": self.df['jaccard'].mean(),
            "subset": self.df['subset'].mean()
        }
        
        return metrics     

    def evaluate_relevancy(self):
        print('Total number of articles:', len(self.df))
        self.df['gt'] = self.df['GT_Events'].apply(lambda x: 1 if len(x) > 0 else 0)
        tp = sum((self.df['gt'] == 1) & (self.df['relevant'] == 1))
        fp = sum((self.df['gt'] == 0) & (self.df['relevant'] == 1))
        fn = sum((self.df['gt'] == 1) & (self.df['relevant'] == 0))
        tn = sum((self.df['gt'] == 0) & (self.df['relevant'] == 0))
        
        
        precision = tp / (tp + fp)
        recall = tp / (tp + fn)
        f1 = 2 * (precision * recall) / (precision + recall)
        accuracy = (tp + tn)/(tp + fp + tn + fn)
        
        return precision, recall, f1, accuracy
  
    def get_disease_ner_metrics(self, gt, pred):
        gt = [[i.lower() for i in x] for x in gt]
        pred = [[i.lower() for i in x] for x in pred]
        precision_list = []; recall_list = []; f1_list = []
        for i in range(len(gt)):
            tp = 0; fp = 0; fn = 0
            
            gt_set = set(gt[i])
            pred_set = set(pred[i])
            
            # print(gt_set, pred_set)
            
            tp += len(gt_set.intersection(pred_set))
            fp += len(pred_set.difference(gt_set))
            fn += len(gt_set.difference(pred_set))
            
            if tp == 0 and fp == 0 and fn == 0:
                precision = 1
                recall = 1
                f1 = 0
            elif tp == 0 and (fp != 0 or fn != 0):
                precision = 0
                recall = 0
                f1 = 0
            else:
                precision = tp / (tp + fp)
                recall = tp / (tp + fn)
                f1 = 2 * (precision * recall) / (precision + recall)
            
            precision_list.append(precision)
            recall_list.append(recall)
            f1_list.append(f1)
            
        precision = sum(precision_list) / len(precision_list)
        recall = sum(recall_list) / len(recall_list)
        f1 = sum(f1_list) / len(f1_list)
        
        return precision, recall, f1
  
    
    def evaluate_disease_ner(self):
        
        metrics = {}
        new_df = self.df.copy()
        # remove rows where 'GT_Events is empty
        new_df = new_df[new_df['GT_Events'].apply(len) > 0]
        new_df['GT_Events'] = new_df['GT_Events'].apply(lambda events: event_eval.group_diseases(events))

        new_df['diseases_gt'] = [[event['disease'] for event in events] for events in new_df['GT_Events']]
        new_df = new_df.reset_index(drop=True)
   
        disease_pred = new_df['disease_by_keyword_spotting'].fillna('').apply(lambda x: x.split(', '))
        
        new_disease_pred = []
        for disease_list in disease_pred:
            grouped_disease_list = []
            for disease in disease_list:
                flag = 0
                for key in self.synonyms.keys():
                    synonyms_for_disease = [x.lower() for x in self.synonyms[key]]
                    if disease.lower().lstrip().rstrip() in synonyms_for_disease:
                        grouped_disease_list.append(key)
                        flag = 1
                        break
                if flag == 0:
                    grouped_disease_list.append(disease)
                    
            new_disease_pred.append(grouped_disease_list)
        
                    
        precision, recall, f1 = self.get_disease_ner_metrics(new_df['diseases_gt'], new_disease_pred)
        metrics['disease_by_keyword_spotting'] = {'precision': precision, 'recall': recall, 'f1': f1}
        
        disease_pred = new_df['disease_by_ner'].fillna('').apply(lambda x: x.split(', '))
        
        new_disease_pred = []
        for disease_list in disease_pred:
            grouped_disease_list = []
            for disease in disease_list:
                flag = 0
                for key in self.synonyms.keys():
                    synonyms_for_disease = [x.lower() for x in self.synonyms[key]]
                    if disease.lower().lstrip().rstrip() in synonyms_for_disease:
                        grouped_disease_list.append(key)
                        flag = 1
                        break
                if flag == 0:
                    grouped_disease_list.append(disease)
                    
            new_disease_pred.append(grouped_disease_list)
            
        
        precision, recall, f1 = self.get_disease_ner_metrics(new_df['diseases_gt'], new_disease_pred)
        metrics['disease_by_ner'] = {'precision': precision, 'recall': recall, 'f1': f1}
        
        
        disease_pred = new_df['disease_pred'].fillna('').apply(lambda x: x.split(', '))
        
        new_disease_pred = []
        for disease_list in disease_pred:
            grouped_disease_list = []
            for disease in disease_list:
                flag = 0
                for key in self.synonyms.keys():
                    synonyms_for_disease = [x.lower() for x in self.synonyms[key]]
                    if disease.lower().lstrip().rstrip() in synonyms_for_disease:
                        grouped_disease_list.append(key)
                        flag = 1
                        break
                if flag == 0:
                    grouped_disease_list.append(disease)
            new_disease_pred.append(grouped_disease_list)

        precision, recall, f1 = self.get_disease_ner_metrics(new_df['diseases_gt'], new_disease_pred)
        metrics['diseases'] = {'precision': precision, 'recall': recall, 'f1': f1}
        
        
        return metrics

    def location_ner_metrics(self, predictions, ground_truth):
            tp = 0; fp = 0; fn = 0
            ground_truth_set = list(set([i['location'].lower() for i in ground_truth]))

            for pred_text, pred_entity in predictions:
                pred_text = pred_text.lower()
                pred_entity = pred_entity.lower()

                if pred_text in ground_truth_set or pred_entity in ground_truth_set:
                    tp += 1
                else:
                    fp += 1

            fn = len(ground_truth_set) - tp

            if tp == 0 and fp == 0 and fn == 0:
                precision = 1; recall = 1; f1 = 1
            elif tp == 0 and (fp != 0 or fn != 0):
                precision = 0; recall = 0; f1 = 0
            else:
                precision = tp / (tp + fp)
                recall = tp / (tp + fn)
                f1 = 2 * (precision * recall) / (precision + recall)

            return precision, recall, f1
        
    def evaluate_location_ner(self):   
        
        df['precision'], df['recall'], df['f1'] = zip(*df.apply(
            lambda row: self.location_ner_metrics(eval(row['locations_pred']), row['GT_Events']),
            axis=1
        ))

        avg_precision = df['precision'].mean()
        avg_recall = df['recall'].mean()
        avg_f1 = df['f1'].mean()
        
        return {'precision': avg_precision, 'recall': avg_recall, 'f1': avg_f1}

In [None]:
event_eval = EventEvaluation(df)

# Relevancy Classifier

In [None]:
precision, recall, acc, f1,  = event_eval.evaluate_relevancy()
print('Relevancy Classifier:')
print('Precision:', precision, 'Recall:', recall, 'Accuracy:', acc, 'F1:', f1)

# Disease NER

In [None]:
metrics = event_eval.evaluate_disease_ner()
metrics

# Location NER

In [None]:
metrics = event_eval.evaluate_location_ner()
metrics

# QA

In [None]:
thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
qa_results = {}
df = df[df['GT_Events'].apply(len) > 0]

for threshold in thresholds:
    df_copy = df.copy()
    df_copy['events'] = df_copy.apply(lambda x: [i for i in x['events'] if i['score']>=threshold], axis=1)    
    event_eval = EventEvaluation(df_copy.copy())
    metrics = event_eval.evaluate_event_extraction()
    qa_results[threshold] = metrics

In [None]:
# write results to csv
import csv
fieldnames = ['threshold', 'precision', 'recall', 'f1', 'exact_match', 'jaccard', 'subset']

# Write data to CSV file
with open('results_each_phase_qa.csv', mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    
    # Write rows
    for threshold, scores in qa_results.items():
        row = {'threshold': threshold}
        for key, value in scores.items():
            row[key] = round(value, 3) 
        writer.writerow(row)
out = pd.read_csv('results_each_phase_qa.csv')
out

# NLI

In [None]:
def filter_events(row): # keep only those rows where number is ''
    filtered_events = []
    for event in row['GT_Events']:
        if event.get('number') == '':
            filtered_events.append(event)
    row['GT_Events'] = filtered_events
    return row

nli_results = {}
nli_df = df.copy()
nli_df = nli_df.apply(filter_events, axis=1)
nli_df = nli_df[nli_df['GT_Events'].apply(len) > 0]

print('Total number of numberless events in GT: ', sum(nli_df['GT_Events'].apply(len)))
print('Total number of articles:', len(nli_df))
nli_df['noevents_events'] = nli_df.apply(lambda x: [{**i, 'number': '', 'incident_type': ''} for i in x['noevents_events']], axis=1) # add number and incident_type to each event

for threshold in thresholds:
    nli_df['events'] = None
    nli_df['events'] = nli_df.apply(lambda x: [i for i in x['noevents_events'] if 'score' in i and i['score'] >= threshold], axis=1)
    event_eval = EventEvaluation(nli_df)
    metrics = event_eval.evaluate_event_extraction()
    nli_results[threshold] = metrics
    

In [None]:
# write results to csv
import csv
fieldnames = ['threshold', 'precision', 'recall', 'f1', 'exact_match', 'jaccard', 'subset']

# Write data to CSV file
with open('results_each_phase_nli.csv', mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    
    # Write rows
    for threshold, scores in nli_results.items():
        row = {'threshold': threshold}
        for key, value in scores.items():
            row[key] = round(value, 3) 
        writer.writerow(row)
out = pd.read_csv('results_each_phase_nli.csv')
out

# LLM Phase wise


In [None]:
df = pd.read_csv("llm_output.csv")
print(df.columns)
# Preprocess data

def postprocess_number(x):
    try:
        if type(x) == int:
            return x
        number = w2n.word_to_num(x)
    except:
        number = ''
        
    return number
        
    
def rename_keys(d):
    return {'incident' if k == 'Incident (case or death)' else 'incident_type' if k == 'Incident Type (new or total)' else k: v for k, v in d.items()}

def lower_case_keys(d):
    return {k.lower(): v.lower() if isinstance(v, str) else v for k, v in d.items()}


# Convert string to list
df['events'] = df['filtered_gpt-3.5-turbo_events']
# convert N/A to empty list
df['events'] = df['events'].apply(lambda x: '[]' if pd.isna(x) else x)
df['events'] = df['events'].apply(eval)


# count total no of non empty events
print('Total no of non empty events:', df['events'].apply(len).sum())

df['GT_Events'] = df['GT_Events'].apply(eval)
print('Total no of non empty GT events:', df['GT_Events'].apply(len).sum())

# remove articles with no events
# df = df[df['GT_Events'].apply(len) > 0]
df = df.reset_index(drop=True)

print('Total Articles:', df.shape[0])

# Disease NER

In [None]:

event_eval = EventEvaluation(df)
# remove rows where 'GT_Events is empty
new_df = df[df['GT_Events'].apply(len) > 0]
new_df = new_df.reset_index(drop=True)
new_df['GT_Events'] = new_df['GT_Events'].apply(lambda events: event_eval.group_diseases(events))
new_df['diseases_gt'] = [[event['Disease'] for event in events] for events in new_df['GT_Events']]
new_df = new_df.reset_index(drop=True)

# in new_df['events'] rename 'disease' key to 'Disease'
def rename_keys(d):
    return {'Disease' if k == 'disease' else k: v for k, v in d.items()}
new_df['events'] = new_df['events'].apply(lambda x: [rename_keys(i) for i in x])
new_df['events'] = new_df['events'].apply(lambda events: event_eval.group_diseases(events))
new_df['diseases_pred'] = new_df['events'].apply(lambda x: [i['Disease'] for i in x])
# rename 'disease' key to 'Disease' 
print(new_df['diseases_pred'][:5])
disease_pred = new_df['diseases_pred']
precision, recall, f1 = event_eval.get_disease_ner_metrics(new_df['diseases_gt'], disease_pred)
print('Disease NER:')
print('Precision:', precision, 'Recall:', recall, 'F1:', f1)

# Location NER

In [28]:
new_df['locations_gt'] = [[event['Location'] for event in events] for events in new_df['GT_Events']]
new_df['locations_pred'] = new_df['events'].apply(lambda x: [i['location'] for i in x])
print(new_df.shape)
avg_precision = 0; avg_recall = 0; avg_f1 = 0
for i in range(len(new_df)):
    print(new_df['locations_pred'][i], new_df['locations_gt'][i])
    precision, recall, f1, exact_match = event_eval.precision_recall_method_2(new_df['locations_pred'][i], new_df['locations_gt'][i])
    avg_precision += precision
    avg_recall += recall
    avg_f1 += f1
    
avg_precision /= len(new_df)
avg_recall /= len(new_df)
avg_f1 /= len(new_df)

print('Location NER:')
print('Precision:', avg_precision, 'Recall:', avg_recall, 'F1:', avg_f1)

(394, 12)
['Gwalior', 'Gwalior', 'Gwalior'] ['Gwalior', 'Gwalior']
{'Gwalior'} {'Gwalior'}
[] ['Uttarakhand', 'Uttarkhand']
set() {'Uttarkhand', 'Uttarakhand'}
['Ambikapur'] ['Ambikapur']
{'Ambikapur'} {'Ambikapur'}
['Bilaspur'] ['Bilashpur']
{'Bilaspur'} {'Bilashpur'}
['India'] ['India', 'India', 'India']
{'India'} {'India'}
['Chhattisgarh', 'Chhattisgarh'] ['Chhattisgarh', 'Chhattisgarh']
{'Chhattisgarh'} {'Chhattisgarh'}
['Nagpur'] ['Nagpur']
{'Nagpur'} {'Nagpur'}
['Karnal'] ['Karnal', 'Karnal']
{'Karnal'} {'Karnal'}
['Chhattisgarh'] ['Chhattisgarh']
{'Chhattisgarh'} {'Chhattisgarh'}
[] ['Nagaur']
set() {'Nagaur'}
['Delhi', 'Delhi', 'Delhi'] ['Delhi', 'Delhi']
{'Delhi'} {'Delhi'}
['Delhi', 'Delhi'] ['Delhi', 'Delhi', 'Delhi ']
{'Delhi'} {'Delhi ', 'Delhi'}
['Kaithal'] ['India']
{'Kaithal'} {'India'}
['Kaithal'] ['']
{'Kaithal'} {''}
['Muzaffarpur', 'Muzaffarpur', 'Muzaffarpur', 'Muzaffarpur'] ['Bihar']
{'Muzaffarpur'} {'Bihar'}
['Muzaffarpur', 'Muzaffarpur', 'Muzaffarpur', 'Muzaffar