In [25]:
import json
import pandas as pd

In [26]:
file_paths = ['gemma/gemma-2-9b-it_temp1', 'mistral/Mistral-7B-Instruct-v0.3_temp1', 'llama/Llama-3.1-8B-Instruct_temp1',
              'phi/Phi-3-small-8k-instruct_temp1', 'qwen/Qwen2.5-7B-Instruct_temp1']

output_file = 'gen_valences.csv'

In [27]:
def get_valence(model_output):
    first_40_chars = model_output[:40].lower()
    if "positive" in first_40_chars:
        return 1
    elif "negative" in first_40_chars:
        return -1
    return None  # In case neither is found

In [28]:
dataset = []


for file_path in file_paths:
    model_name = file_path.split('/')[0]  
    valence_column = f'valence_{model_name}'  
    
    # Open the JSON file for the current model
    with open(f'..\..\data\generations\{file_path}.json', 'r', encoding='utf-8') as file:
        data = json.load(file)
        
    for entry in data:
        file_name = entry.get("file_name", "")
        model_output = entry.get("model_output", "")
        valence = get_valence(model_output)
        
        existing_entry = next((item for item in dataset if item["file_name"] == file_name), None)
        
        if existing_entry:
            existing_entry[valence_column] = valence
        else:
            dataset.append({
                "file_name": file_name,
                valence_column: valence
            })

df = pd.DataFrame(dataset)

In [29]:
# Optionally, display valence counts for each model
for file_path in file_paths:
    model_name = file_path.split('/')[0]
    valence_column = f'valence_{model_name}'
    print(f"\nValence counts for {model_name}:")
    print(df[valence_column].value_counts())


Valence counts for gemma:
 1    196
-1      8
Name: valence_gemma, dtype: int64

Valence counts for mistral:
 1    146
-1     58
Name: valence_mistral, dtype: int64

Valence counts for llama:
 1    160
-1     44
Name: valence_llama, dtype: int64

Valence counts for phi:
 1    195
-1      9
Name: valence_phi, dtype: int64

Valence counts for qwen:
 1    197
-1      7
Name: valence_qwen, dtype: int64


In [30]:
df

Unnamed: 0,file_name,valence_gemma,valence_mistral,valence_llama,valence_phi,valence_qwen
0,Chrono_SNES_Chrono Trigger_600 AD.txt,1,1,1,1,1
1,Indiana Jones_PC_Indiana Jones and the Last Cr...,1,1,1,1,1
2,Final Fantasy_PS1_Final Fantasy VII_Great Warr...,1,1,1,1,1
3,Chrono_SNES_Chrono Trigger_Ocean Palace.txt,1,1,1,1,1
4,Final Fantasy_PS1_Final Fantasy VII_Fortress o...,1,-1,1,1,1
...,...,...,...,...,...,...
199,Other_PC_Aion Tower of Eternity_Song of Moonli...,1,-1,1,1,1
200,Final Fantasy_PS1_Final Fantasy VII_Waltz de C...,1,1,1,1,1
201,The Legend of Zelda_N64_The Legend of Zelda Oc...,1,-1,1,1,1
202,Banjo-Kazooie_N64_Banjo-Kazooie_Rusty Bucket B...,1,1,1,1,1


In [32]:
df.to_csv("../../data//results/" + output_file, index=False)