In [None]:
import os
def load_annotations(annotations_file):
    annotations = []
    position = []
    with open(annotations_file, 'r') as file:
        for line in file:
            parts = line.strip().split('\t')
            position.append([parts[2],parts[3]])
            # Find the index where "Protagonist", "Antagonist", or "Innocent" appears
            for i, part in enumerate(parts):
                if part in ["Protagonist", "Antagonist", "Innocent"]:
                    main_info = parts[:i+1]  # Everything up to and including the found role
                    grouped_info = parts[i+1:]  # Everything after   the role
                    annotations.append(main_info + [grouped_info])  # Grouped as a subarray
                    break
    return annotations, position

def load_documents(raw_documents_folder):
    documents = {}
    for filename in os.listdir(raw_documents_folder):
        file_path = os.path.join(raw_documents_folder, filename)
        with open(file_path, 'r', encoding='utf-8') as file:
            documents[filename] = file.read()
    return documents

def process_documents(annotations_file, raw_documents_folder):

    annotations, position_array = load_annotations(annotations_file)
    documents = load_documents(raw_documents_folder)

    result = []

    for annotation in annotations:
        filename = annotation[0]
        if filename in documents:
            document_text = documents[filename]
            result.append([document_text] + annotation)

    return result, position_array


annotations_file = 'subtask-1-annotations.txt'
raw_documents_folder = 'raw-documents'
result_array, position_array = process_documents(annotations_file, raw_documents_folder)

In [None]:
!pip install --upgrade openai


In [None]:
def exact_match2(gold_main_role, pred_main_role, gold_sub_roles, pred_sub_roles):
    gold = str(gold_main_role).lower()
    pred = str(pred_main_role).lower()

    a = 1
    if gold != pred:
        a = 0

    if len(gold_sub_roles) != len(pred_sub_roles):
        return a, 0

    gold_sub_roles.sort()
    pred_sub_roles.sort()

    length = len(gold_sub_roles)
    for i in range(length):
        if str(gold_sub_roles[i]).lower != str(pred_sub_roles[i]).lower:
            return a, 0

    return a, 1

In [None]:
from openai import OpenAI
from difflib import SequenceMatcher

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

def mark_target_word(text, position_range, start_token="[TARGET]", end_token="[/TARGET]", max_tokens=512):
    start_char, end_char = position_range
    text = text[0]
    marked_text = text[:int(start_char)] + start_token + text[int(start_char):int(end_char)] + end_token + text[int(end_char):]

    return marked_text

article = []
for x in range(len(result_array)):
  position_range = position_array[x]
  text = result_array[x]
  article.append(mark_target_word(text, position_range))



models = {"gpt-3.5-turbo-1106","gpt-4o","gpt-3.5-turbo"}
client = OpenAI(api_key='API KEY HERE')
for model in models:
  output = []
  main_correct = 0
  sub_correct = 0
  both_correct = 0
  n=0
  for article in result_array:

    completion = client.chat.completions.create(
          model=model,
          messages=[
            {"role": "system", "content": "You are a helpful assistant"},
            {"role": "user", "content": "This is an article:" + article[0]+ "Looking at the target word marked by [TARGET] and [/TARGET] tokens, is it a protagonists, antagonists, or innocent article?"}
          ]
        )

    pred = completion.choices[0].message.content
    main_category = 'protagonist'
    val = similar('protagonist', pred)
    if val < similar('antagonist', pred):
      main_category = 'antagonist'
    if val < similar('innocent', pred):
      main_category = 'innocent'

    if main_category == 'protagonist':
        completion = client.chat.completions.create(
          model=model,
          messages=[
            {"role": "system", "content": "You are a helpful assistant"},
            {"role": "user", "content": "This is a protagonist article:" + article[0]+
            "Looking at the target word marked by [TARGET] and [/TARGET] tokens, which of the following catgeories would it go under (it can be multiple)"+
            "Guardian, Martyr, Peacemaker, Rebel, Underdog, Virtuous. Respond with just the list of categories."}
          ]
        )
        sub_category = []
        sub = ['guardian', 'martyr', 'peacemaker', 'rebel', 'underdog', 'virtuous']
        for i in range(len(sub)):
          if sub[i] in completion.choices[0].message.content.lower():
            sub_category.append(sub[i])
    elif main_category == 'antagonist':
      completion = client.chat.completions.create(
          model=model,
          messages=[
            {"role": "system", "content": "You are a helpful assistant"},
            {"role": "user", "content": "This is a antagonist article:" + article[0]+
              "Looking at the target word marked by [TARGET] and [/TARGET] tokens, which of the following catgeories would it go under (it can be multiple)"+
              "Instigator, Conspirator, Tyrant, Foreign Adversary, Traitor, Spy, Saboteur, Corrupt, Incompetent, Terrorist, Deceiver, Bigot"}
          ]
        )
      sub_category = []
      sub = ['instigator', 'conspirator', 'tyrant', 'foreign adversary', 'traitor', 'spy', 'saboteur', 'corrupt', 'corrupt', 'incompetent', 'terrorist', 'deceiver', 'bigot']
      for i in range(len(sub)):
        if sub[i] in completion.choices[0].message.content.lower():
          sub_category.append(sub[i])
    elif main_category == 'innocent':
      completion = client.chat.completions.create(
              model=model,
              messages=[
                {"role": "system", "content": "You are a helpful assistant"},
                {"role": "user", "content": "This is a antagonist article:" + article[0]+
                  "Looking at the target word marked by [TARGET] and [/TARGET] tokens, which of the following catgeories would it go under (it can be multiple)"+
                  "Forgotten, Exploited, Victim, Scapegoat"}
              ]
            )
      sub_category = []
      sub = ['forgotten', 'exploited', 'victim', 'scapegoat']
      for i in range(len(sub)):
        if sub[i] in completion.choices[0].message.content.lower():
          sub_category.append(sub[i])

    x = exact_match2(result_array[n][-2],main_category,result_array[n][-1], sub_category)
    if x[0] == 1 and x[1] == 1:
      both_correct += 1
    if x[0] == 1:
      main_correct += 1
    if x[1] == 1:
      sub_correct += 1
    n += 1
    output.append([main_category, sub_category])
  print(model, both_correct, main_correct, sub_correct, n)



gpt-3.5-turbo-1106 0 161 0 686
gpt-4o 0 131 0 686
gpt-3.5-turbo 0 152 0 686


In [None]:
print(result_array[0][-2])