In [47]:
from tqdm import tqdm

In [12]:
import numpy as np
import pandas as pd
from together import Together

emphasis_task = pd.read_csv('Hyperbaton examples - Emphasis Identification Task.csv')
print(emphasis_task.iloc[69])
emphasis_task = emphasis_task.drop(index=69)
print(emphasis_task.iloc[69])

Ground Truth Backtranslations                                                  NaN
Example:                         ABOVE THIS ROW ARE EXAMPLES FROM THE INTERNET,...
Source:                                                                        NaN
Emphasis (Normal)                                                              NaN
Emphasis (Hyperbaton)                                                          NaN
Name: 69, dtype: object
Ground Truth Backtranslations      "There was no light in the room."
Example:                         "Light there was none in the room."
Source:                                                          LLM
Emphasis (Normal)                                         There, the
Emphasis (Hyperbaton)                                     Light, the
Name: 70, dtype: object


In [None]:
client = Together(api_key="APIKEY")

response = client.chat.completions.create(
    model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
    messages=[{"role": "user", "content": "What are the top 3 things to do in New York?"}],
    stream=False
)

In [23]:
def get_model_response(model, prompt):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        stream=False
    )
    return response.choices[0].message.content

In [27]:
emphasis_task

Unnamed: 0,Ground Truth Backtranslations,Example:,Source:,Emphasis (Normal),Emphasis (Hyperbaton)
0,There was no object. There was no passion. I l...,"""Object there was none. Passion there was none...",ThoughtCo,"object, passion","Object, passion"
1,"A butterfly emerged from a cocoon, like a lady...","""From Cocoon forth a Butterfly\nAs Lady from h...",ThoughtCo,"butterfly, emerged","From, Lady"
2,"Some rise by sin, and some fall by virtue","""Some rise by sin, and some by virtue fall.""",ThoughtCo,"Some, rise","Some, rise"
3,And a small cabin made of clay and wattles was...,"""And a small cabin build there, of clay and wa...",ThoughtCo,"small, cabin","And, small"
4,Mankind do not pity this busy monster,"""pity this busy monster mankind not""",ThoughtCo,"Mankind, do","pity, busy"
...,...,...,...,...,...
2579,"""The snow fell gently from the sky, covering t...","""Gently fell the snow from the sky, covering t...",LLM,"The, white","Gently, white"
2580,"""The distant mountains rose above the valley, ...","""Above the valley rose the distant mountains, ...",LLM,"The, with","Above, with"
2581,"""The clouds parted, revealing a brilliant blue...","""Parted were the clouds, revealing a brilliant...",LLM,"The, blue","Parted, blue"
2582,"""The world seemed still, the air heavy with th...","""Seemed still was the world, the air heavy wit...",LLM,"The, of","Seemed, of"


In [26]:
#for each model
#generate emphasis words for ground truth backtranslations and example column
#log them into df {model_name}+{normal or hyperbaton}+{prediction}
#precision, recall, f1

In [28]:
def get_emphasis_words(model, sentence):
    prompt = f"Output the word or words you think that are the emphasis in the following sentence. {sentence} If there is more than one word, separate them with commas. Only output the emphasis words, not anything else."
    return get_model_response(model, prompt)

In [34]:
def calculate_metrics(ground_truth, predictions):
    true_positives = set(ground_truth) & set(predictions) 
    false_positives = set(predictions) - set(ground_truth)
    false_negatives = set(ground_truth) - set(predictions) 

    precision = len(true_positives) / (len(true_positives) + len(false_positives)) if len(predictions) > 0 else 0
    recall = len(true_positives) / (len(true_positives) + len(false_negatives)) if len(ground_truth) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    return precision, recall, f1_score


In [54]:
def get_words(input_string):
    return [word.strip().lower() for word in input_string.split(",")]

In [49]:
llama_70b_normal_emphasis_words = []
llama_70b_hyperbaton_emphasis_words = []
llama_70b_normal_metrics = []
llama_70b_hyperbaton_metrics = []
for index, row in tqdm(emphasis_task.iterrows()):
    normal_emphasis_words = get_emphasis_words(model="meta-llama/Llama-3.3-70B-Instruct-Turbo", sentence=row["Ground Truth Backtranslations"])
    llama_70b_normal_emphasis_words.append(normal_emphasis_words)
    hyperbaton_emphasis_words = get_emphasis_words(model="meta-llama/Llama-3.3-70B-Instruct-Turbo", sentence=row["Example:"])
    llama_70b_hyperbaton_emphasis_words.append(hyperbaton_emphasis_words)
    llama_70b_normal_metrics.append(calculate_metrics(get_words(row["Emphasis (Normal)"]), get_words(normal_emphasis_words)))
    llama_70b_hyperbaton_metrics.append(calculate_metrics(get_words(row["Example:"]), get_words(hyperbaton_emphasis_words)))

2583it [2:03:03,  2.86s/it]


In [32]:
normal_emphasis_words = get_emphasis_words(model="meta-llama/Llama-3.3-70B-Instruct-Turbo", sentence=emphasis_task.iloc[0]["Ground Truth Backtranslations"])

In [50]:
llama_70b_normal_precision, llama_70b_normal_recall, llama_70b_normal_f1 = zip(*llama_70b_normal_metrics)

In [57]:
llama_70b_hyperbaton_precision, llama_70b_hyperbaton_recall, llama_70b_hyperbaton_f1 = zip(*llama_70b_hyperbaton_metrics)

In [58]:
np.average(llama_70b_normal_f1[0:70]), np.average(llama_70b_hyperbaton_f1[0:70])

(np.float64(0.20329004329004327), np.float64(0.011111111111111112))

In [None]:
np.average(llama_70b_hyperbaton_f1[0:70]), np.average(llama[0:70])

In [59]:
np.average(llama_70b_normal_f1), np.average(llama_70b_hyperbaton_f1)

(np.float64(0.07691842935745374), np.float64(0.00404291798020021))

In [38]:
calculate_metrics(get_words(emphasis_task.iloc[0]["Emphasis (Normal)"]), get_words(normal_emphasis_words))

(0.6666666666666666, 1.0, 0.8)

In [63]:
import json
llama_outputs = {
    "llama_70b_normal_emphasis_words": llama_70b_normal_emphasis_words,
    "llama_70b_hyperbaton_emphasis_words": llama_70b_hyperbaton_emphasis_words,
    "llama_70b_normal_metrics": llama_70b_normal_metrics,
    "llama_70b_hyperbaton_metrics": llama_70b_hyperbaton_metrics
}

In [64]:
file_path = "llama_outputs.json"

# Writing to the JSON file
with open(file_path, "w") as json_file:
    json.dump(llama_outputs, json_file, indent=4)  # `indent=4` formats the JSON for better readability