## STEU Analysis

In [1]:
import pandas as pd 
gold = pd.read_csv("data/steu-categories.csv")
gemma = pd.read_csv("results/gemma-2-27b-it/steu-results.csv")
gpt4 = pd.read_csv("results/gpt4o-mini/steu-results.csv")
llama = pd.read_csv("results/Meta-Llama-3.1-70B-Instruct/steu-results.csv")
mixtral = pd.read_csv("results/Mixtral-8x7B-Instruct-v0.1/steu-results.csv")

llm_map = {"gemma": gemma, "gpt4": gpt4, "llama": llama, "mixtral": mixtral}
llms = list(llm_map.keys())

llm_factors = {
    "gemma": { "wellbeing": 4.67, "self_control": 4.5, "emotionality": 4.62, "sociability": 4.67 }, 
    "gpt4": {"wellbeing": 4.33, "self_control": 4.33, "emotionality": 4.0, "sociability": 4.83},
    "llama": {"wellbeing": 4.83, "self_control": 3.33, "emotionality": 3.0, "sociability": 3.67},
    "mixtral": {"wellbeing": 4.83, "self_control": 3.83, "emotionality": 3.5, "sociability": 3.67}
}

In [37]:
reg_rows = []
for i, row in gold.iterrows():

    if i == 0:
        continue


    for llm in llms:
        r = {
            "situation": row["situation"],
            "model": llm, 
            "is_correct": llm_map[llm].loc[i, "is_correct"],
            "ekman": row["ekman"],
            "complex": row["complex"],
            "wellbeing": llm_factors[llm]["wellbeing"],
            "self_control": llm_factors[llm]["self_control"],
            "emotionality": llm_factors[llm]["emotionality"],
            "sociability": llm_factors[llm]["sociability"],
        }
        
        reg_rows.append(r)

reg_df = pd.DataFrame(reg_rows)


In [38]:
reg_df

Unnamed: 0,situation,model,is_correct,ekman,complex,wellbeing,self_control,emotionality,sociability
0,A pleasant experience ceases unexpectedly and ...,gemma,0,sadness,Sad,4.67,4.50,4.62,4.67
1,A pleasant experience ceases unexpectedly and ...,gpt4,1,sadness,Sad,4.33,4.33,4.00,4.83
2,A pleasant experience ceases unexpectedly and ...,llama,0,sadness,Sad,4.83,3.33,3.00,3.67
3,A pleasant experience ceases unexpectedly and ...,mixtral,0,sadness,Sad,4.83,3.83,3.50,3.67
4,Xavier completes a difficult task on time and ...,gemma,1,happiness,Pride,4.67,4.50,4.62,4.67
...,...,...,...,...,...,...,...,...,...
163,Matthew has been at his current job for six mo...,mixtral,1,sadness,Regret,4.83,3.83,3.50,3.67
164,Penny's hockey team trained hard and won the c...,gemma,0,happiness,Pride,4.67,4.50,4.62,4.67
165,Penny's hockey team trained hard and won the c...,gpt4,0,happiness,Pride,4.33,4.33,4.00,4.83
166,Penny's hockey team trained hard and won the c...,llama,1,happiness,Pride,4.83,3.33,3.00,3.67


In [39]:
reg_df.to_csv("results/regression-data.csv", index=False)

In [24]:
merged_rows = []
for i, row in gold.iterrows():

    if i == 0:
        continue

    row = row.to_dict()
    for llm in llms:
        row[f"{llm}-pred"] = llm_map[llm].loc[i, "pred"]
        row[f"{llm}-is-correct"] = llm_map[llm].loc[i, "is_correct"]
        row[f"{llm}-explanation"] = llm_map[llm].loc[i, "explanation"]
    
    all_correct = int(row["gemma-is-correct"] and row["gpt4-is-correct"] and row["llama-is-correct"] and row["mixtral-is-correct"])
    all_incorrect = int(row["gemma-is-correct"] == 0 and row["gpt4-is-correct"] == 0 and row["llama-is-correct"] == 0 and row["mixtral-is-correct"] == 0)
    row["all_correct"] = all_correct
    row["all_incorrect"] = all_incorrect
    merged_rows.append(row)

results_merged = pd.DataFrame(merged_rows)

In [28]:
results_merged.mean()

  results_merged.mean()


gemma-is-correct      0.571429
gpt4-is-correct       0.785714
llama-is-correct      0.619048
mixtral-is-correct    0.547619
all_correct           0.404762
all_incorrect         0.142857
dtype: float64

In [36]:
results_merged.groupby("ekman").mean()

  results_merged.groupby("ekman").mean()


Unnamed: 0_level_0,gemma-is-correct,gpt4-is-correct,llama-is-correct,mixtral-is-correct
ekman,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
anger,0.3,0.7,0.4,0.3
disgust,0.0,1.0,0.0,1.0
fear,0.333333,0.5,0.5,0.333333
happiness,0.866667,0.933333,0.866667,0.8
sadness,0.571429,0.857143,0.571429,0.428571
surprise,0.666667,0.666667,0.666667,0.666667


In [37]:
results_merged.groupby("complex").mean()

  results_merged.groupby("complex").mean()


Unnamed: 0_level_0,gemma-is-correct,gpt4-is-correct,llama-is-correct,mixtral-is-correct
complex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Angry,0.5,1.0,0.0,0.5
Contempt,0.0,0.75,0.25,0.25
Dislike,0.5,0.5,0.5,0.0
Distressed,0.75,0.75,0.75,0.25
Frustrated,0.333333,1.0,1.0,1.0
Gratitude,1.0,1.0,1.0,1.0
Hope,1.0,1.0,0.333333,0.333333
Joy,0.666667,1.0,1.0,1.0
Pride,0.666667,0.666667,1.0,0.666667
Regret,0.666667,0.666667,0.666667,1.0


In [29]:
results_merged.to_csv("results/steu-results-merged.csv", index=False)

## TIEQUE

In [13]:
import pandas as pd
import numpy as np

gemma = pd.read_csv("results/gemma-2-27b-it/teique-results.csv")
gpt4 = pd.read_csv("results/gpt4o-mini/teique-results.csv")
llama = pd.read_csv("results/Meta-Llama-3.1-70B-Instruct/teique-results.csv")
mixtral = pd.read_csv("results/Mixtral-8x7B-Instruct-v0.1/teique-results.csv")

llm_map = {"gemma": gemma, "gpt4": gpt4, "llama": llama, "mixtral": mixtral}
llms = list(llm_map.keys())

merged_rows = []
for i, r in gemma.iterrows():
    
    scores = [r["score"], gpt4.loc[i, "score"], llama.loc[i, "score"], mixtral.loc[i, "score"]]
    std = np.std(scores)
    new_row = {
        "tqn": r['tqn'],
        "statement": r['statement'],
        "gemma_score": r["score"],
        "gemma_response": r["response"],
        "gpt4o_score": gpt4.loc[i, "score"],
        "gpt4o_response": gpt4.loc[i, "response"],
        "llama_score": llama.loc[i, "score"],
        "llama_response": llama.loc[i, "response"],
        "mixtral_score": mixtral.loc[i, "score"],
        "mixtral_response": mixtral.loc[i, "response"],
        "std": std
    }

    merged_rows.append(new_row)

merged = pd.DataFrame(merged_rows)

In [14]:
merged.to_csv("results/teique-results-merged.csv", index=False)

In [12]:
import numpy as np
score = [1,6,6,3]


2.1213203435596424