In [None]:
import pandas as pd
import re
import csv
import copy
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import ast
import getpass
import os
from openai import OpenAI

In [None]:
gpt_api_key = "your_api_key_here"  # Replace with your actual OpenAI API key
llm = OpenAI(api_key=gpt_api_key)
def gpt_get_completion(prompt, target, model="gpt-4o-mini"):
    completion = llm.chat.completions.create(
    model= model,
      messages=[
    {"role": "system", "content": "You are an expert in named entity recognition. Your task is to identify all named entities in the given sentence. Only output the entity and nothing else."},
    {"role": "user", "content": prompt}
      ]
    )
    return(completion.choices[0].message.content)

In [106]:
def read_csv_to_list_of_dicts(file_path):
    list_of_dicts = []
    with open(file_path, 'r') as csv_file:
        csv_reader = csv.DictReader(csv_file, delimiter="\t")
        count = 0
        for row in csv_reader:
            row["label"] = count
            list_of_dicts.append(row)
            count += 1
    return list_of_dicts

# Example usage:
file_path = 'data/SemEval2025_EAMT_Samples.tsv'
samples = read_csv_to_list_of_dicts(file_path)
# print(samples)

In [107]:
language_dict = {
    'ab': 'Abkhaz',
    'ace': 'Acehnese',
    'ach': 'Acholi',
    'aa': 'Afar',
    'af': 'Afrikaans',
    'sq': 'Albanian',
    'alz': 'Alur',
    'am': 'Amharic',
    'ar': 'Arabic',
    'hy': 'Armenian',
    'as': 'Assamese',
    'av': 'Avar',
    'awa': 'Awadhi',
    'ay': 'Aymara',
    'az': 'Azerbaijani',
    'ban': 'Balinese',
    'bal': 'Baluchi',
    'bm': 'Bambara',
    'bci': 'Baoulé',
    'ba': 'Bashkir',
    'eu': 'Basque',
    'btx': 'Batak Karo',
    'bts': 'Batak Simalungun',
    'bbc': 'Batak Toba',
    'be': 'Belarusian',
    'bem': 'Bemba',
    'bn': 'Bengali',
    'bew': 'Betawi',
    'bho': 'Bhojpuri',
    'bik': 'Bikol',
    'bs': 'Bosnian',
    'br': 'Breton',
    'bg': 'Bulgarian',
    'bua': 'Buryat',
    'yue': 'Cantonese',
    'ca': 'Catalan',
    'ceb': 'Cebuano',
    'ch': 'Chamorro',
    'ce': 'Chechen',
    'ny': 'Chichewa',
    'zh': 'Chinese (Simplified)',
    'zh-TW': 'Chinese (Traditional)',
    'chk': 'Chuukese',
    'cv': 'Chuvash',
    'co': 'Corsican',
    'crh': 'Crimean Tatar',
    'hr': 'Croatian',
    'cs': 'Czech',
    'da': 'Danish',
    'fa-AF': 'Dari',
    'din': 'Dinka',
    'dv': 'Divehi',
    'doi': 'Dogri',
    'dov': 'Dombe',
    'nl': 'Dutch',
    'dyu': 'Dyula',
    'dz': 'Dzongkha',
    'en': 'English',
    'eo': 'Esperanto',
    'et': 'Estonian',
    'ee': 'Ewe',
    'fo': 'Faroese',
    'fj': 'Fijian',
    'tl': 'Filipino',
    'fi': 'Finnish',
    'fon': 'Fon',
    'fr': 'French',
    'fy': 'Frisian',
    'fur': 'Friulian',
    'ff': 'Fulfulde',
    'gaa': 'Ga',
    'gl': 'Galician',
    'lg': 'Ganda',
    'ka': 'Georgian',
    'de': 'German',
    'el': 'Greek',
    'gn': 'Guarani',
    'gu': 'Gujarati',
    'ht': 'Haitian Creole',
    'cnh': 'Hakha Chin',
    'ha': 'Hausa',
    'haw': 'Hawaiian',
    'iw': 'Hebrew',
    'hil': 'Hiligaynon',
    'hi': 'Hindi',
    'hmn': 'Hmong',
    'hu': 'Hungarian',
    'hrx': 'Hunsrik',
    'iba': 'Iban',
    'is': 'Icelandic',
    'ig': 'Igbo',
    'ilo': 'Iloko',
    'id': 'Indonesian',
    'ga': 'Irish Gaelic',
    'it': 'Italian',
    'jam': 'Jamaican Patois',
    'ja': 'Japanese',
    'jw': 'Javanese',
    'kac': 'Jingpo',
    'kl': 'Kalaallisut',
    'kn': 'Kannada',
    'kr': 'Kanuri',
    'pam': 'Kapampangan',
    'kk': 'Kazakh',
    'kha': 'Khasi',
    'km': 'Khmer',
    'cgg': 'Kiga',
    'kg': 'Kikongo',
    'rw': 'Kinyarwanda',
    'ktu': 'Kituba',
    'trp': 'Kokborok',
    'kv': 'Komi',
    'gom': 'Konkani',
    'ko': 'Korean',
    'kri': 'Krio',
    'ku': 'Kurdish (Kurmanji)',
    'ckb': 'Kurdish (Sorani)',
    'ky': 'Kyrgyz',
    'lo': 'Lao',
    'ltg': 'Latgalian',
    'la': 'Latin',
    'lv': 'Latvian',
    'lij': 'Ligurian',
    'li': 'Limburgan',
    'ln': 'Lingala',
    'lt': 'Lithuanian',
    'lmo': 'Lombard',
    'luo': 'Luo',
    'lb': 'Luxembourgish',
    'mk': 'Macedonian',
    'mad': 'Madurese',
    'mai': 'Maithili',
    'mak': 'Makassar',
    'mg': 'Malagasy',
    'ms': 'Malay',
    'ms-Arab': 'Malay (Jawi)',
    'ml': 'Malayalam',
    'mt': 'Maltese',
    'mam': 'Mam',
    'gv': 'Manx',
    'mi': 'Maori',
    'mr': 'Marathi',
    'mh': 'Marshallese',
    'mwr': 'Marwadi',
    'mfe': 'Mauritian Creole',
    'chm': 'Meadow Mari',
    'mni-Mtei': 'Meiteilon (Manipuri)',
    'min': 'Minang',
    'lus': 'Mizo',
    'mn': 'Mongolian',
    'my': 'Myanmar (Burmese)',
    'nhe': 'Nahuatl (Eastern Huasteca)',
    'ndc-ZW': 'Ndau',
    'nr': 'Ndebele (South)',
    'new': 'Nepalbhasa (Newari)',
    'ne': 'Nepali',
    'bm-Nkoo': 'NKo',
    'nso': 'Northern Sotho',
    'no': 'Norwegian',
    'nus': 'Nuer',
    'oc': 'Occitan',
    'or': 'Odia (Oriya)',
    'om': 'Oromo',
    'os': 'Ossetian',
    'pag': 'Pangasinan',
    'pap': 'Papiamento',
    'ps': 'Pashto',
    'fa': 'Persian',
    'pl': 'Polish',
    'pt': 'Portuguese',
    'pt-PT': 'Portuguese (Portugal)',
    'pa': 'Punjabi',
    'pa-Arab': 'Punjabi (Shahmukhi)',
    'kek': "Q'eqchi'",
    'qu': 'Quechua',
    'rom': 'Romani',
    'ro': 'Romanian',
    'rn': 'Rundi',
    'ru': 'Russian',
    'se': 'Sami (North)',
    'sm': 'Samoan',
    'sg': 'Sango',
    'sa': 'Sanskrit',
    'sat-Latn': 'Santali',
    'gd': 'Scots Gaelic',
    'sr': 'Serbian',
    'st': 'Sesotho',
    'crs': 'Seychellois Creole',
    'shn': 'Shan',
    'sn': 'Shona',
    'scn': 'Sicilian',
    'szl': 'Silesian',
    'sd': 'Sindhi',
    'si': 'Sinhala',
    'sk': 'Slovak',
    'sl': 'Slovenian',
    'so': 'Somali',
    'es': 'Spanish',
    'su': 'Sundanese',
    'sus': 'Susu',
    'sw': 'Swahili',
    'ss': 'Swati',
    'sv': 'Swedish',
    'ty': 'Tahitian',
    'tg': 'Tajik',
    'ber-Latn': 'Tamazight',
    'ber': 'Tamazight (Tifinagh)',
    'ta': 'Tamil',
    'tt': 'Tatar',
    'te': 'Telugu',
    'tet': 'Tetum',
    'th': 'Thai',
    'bo': 'Tibetan',
    'ti': 'Tigrinya',
    'tiv': 'Tiv',
    'tpi': 'Tok Pisin',
    'to': 'Tongan',
    'ts': 'Tsonga',
    'tn': 'Tswana',
    'tcy': 'Tulu',
    'tum': 'Tumbuka',
    'tr': 'Turkish',
    'tk': 'Turkmen',
    'tyv': 'Tuvan',
    'ak': 'Twi',
    'udm': 'Udmurt',
    'uk': 'Ukrainian',
    'ur': 'Urdu',
    'ug': 'Uyghur',
    'uz': 'Uzbek',
    've': 'Venda',
    'vi': 'Vietnamese',
    'vls': 'West Flemish',
    'cy': 'Welsh',
    'wo': 'Wolof',
    'xh': 'Xhosa',
    'yi': 'Yiddish',
    'yo': 'Yoruba',
    'za': 'Zhuang',
    'zu': 'Zulu'
}


In [108]:
def prompt_runner(prompt_base, dataset, model="GPT"):
  # counter = 0
  output_list = []
  for idx, sample in enumerate(dataset):
    prompt = prompt_base.format(sample['source'])
    # print(prompt)
    # output_list.append( {"id" : sample["id"]})
    # try:
    if model == 'GPT':
      answer = gpt_get_completion(prompt,language_dict[sample["target_locale"]])
    # else:
    #   prediction = llm_gem.invoke(prompt)
    #   answer =  prediction.content

    print(sample['source'],answer)
    # print("\n **** \n {} \n **** \n".format(answer))

    # print(prompt)
    output_list.append( {"label" : sample["label"], 'prediction':answer })
    # output_list[idx]['prediction'] = answer
    # counter += 1
    # except Exception as e:
    #   # print(e)
    #   print('error for sentence: ', sample['source'])
      # output_list[idx]['prediction'] =  'error'
    # if counter == 40:
    #   break

  return output_list

In [109]:
def output_processing(org_output):
  output = copy.deepcopy(org_output)
  # remove empty list samples
  output = [item for item in output if len(item.keys())>= 2]
  # for sample in output:
  #   if len(sample.keys()) < 2:
  #     output.remove(sample)
  # print(output)

  for sample in output:
    sample['prediction'] = sample['prediction'].strip()
  return output

In [110]:
def gold_samples_generator(dataset):
  gold_samples = {}
  for sample in dataset:
    # gold_sample = {}
    gold_samples[sample['label']] = sample['target']
    # gold_samples.append(gold_sample)
  return gold_samples

In [111]:
def metric_calculation(labels, predictions, event_labels):

  # When predicting an event not present in the ground truth, our predictions are incorrect, resulting in a decrease in our Precision (P).
  # If we fail to predict any event when one exists in the ground truth, it decreases our Recall (R).
  #This is why we include zeros in both the ground truth and prediction lists, to address the inconsistency in the number of extracted events."
  labels = [label.lower() for label in labels]
  predictions = [prediction.lower() for prediction in predictions]
  micro_p = precision_score(labels,predictions, labels = event_labels,average='micro')*100.0
  micro_r = recall_score(labels,predictions, labels = event_labels, average='micro')*100.0
  micro_f1 = f1_score(labels,predictions, labels = event_labels, average='micro')*100.0


  print("Micro_F1:",micro_f1)
  print("Micro_Precision:",micro_p)
  print("Micro_Recall:",micro_r)
  return micro_r,micro_p,micro_f1


In [112]:
import pandas as pd
def clean_output(label, prediction):
    labels = [False for _ in label]
    predictions = [False for _ in prediction]
    for item in prediction:
        predictions[int(item['label'])] = item['prediction']
        labels[int(item['label'])] = label[item['label']]
    return labels, predictions

In [113]:
gold_samples = gold_samples_generator(samples)


In [114]:
def add_output(filename, predictions):
    # Step 1: Read the CSV file into a pandas DataFrame
    df = pd.read_csv(filename, delimiter='\t')
    df["GPT4 Entities"] = None
    df["GPT4 Entities"] = predictions
    # Step 3: Save the updated DataFrame back to a CSV file
    df.to_csv(filename, sep='\t',index=False)
    
    print("New column added and CSV file updated successfully.")

In [115]:
prompt = """Translate the following sentence: {}"""
prompt = """{}"""

In [None]:
output = prompt_runner(prompt, samples,"GPT")
processed_output = output_processing(output)
labels,preds = clean_output(gold_samples,processed_output)
add_output(file_path,preds)

What genre does The Invisible Man belong to? The Invisible Man
When was the TV series Mulheres Apaixonadas first aired? Mulheres Apaixonadas
How many seasons of The Ambassador's Daughter are there? The Ambassador's Daughter
What is the main focus of the TV series Don't F**k with Cats: Hunting an Internet Killer? Don't F**k with Cats: Hunting an Internet Killer
How many movements are there in Symphony No. 9? Symphony No. 9
Can you give a brief description of Descartes' Discourse on the Method? Descartes
What is the genre of The Three Musketeers? The Three Musketeers
When was St. Basil's Cathedral built? St. Basil's Cathedral
In which country did the events of Planet of the Apes take place? Planet of the Apes
Were there any rumors or controversies surrounding The Turk's ability to play chess? The Turk
How would you describe the Berlin Cathedral? Berlin Cathedral
How is the President of Russia elected? President of Russia
What were the responsibilities of Anatoly Dyatlov as the deputy chi