### Create the file with true labels and gen classes

In [39]:
import pandas as pd

In [40]:
df_valence = pd.read_csv('..\..\data\\results\gen_valences.csv') 
df_midi = pd.read_csv('..\..\data\\vgmidi_labelled.csv')

In [41]:
# Create a helper function to find matches where file_name is a substring of midi
def match_file_name(row, df_valence):
    for _, valence_row in df_valence.iterrows():
        file_name_clean = valence_row['file_name'].replace('.txt', '')
        if file_name_clean in row['midi']:
            return valence_row
    return None

In [42]:
# Apply the matching function to each row in the midi dataframe
matched_rows = df_midi.apply(lambda row: match_file_name(row, df_valence), axis=1)

matched_rows

Unnamed: 0,file_name,valence_gemma,valence_mistral,valence_llama,valence_phi,valence_qwen,valence_gpt
0,Banjo-Kazooie_N64_Banjo-Kazooie_Boggys Igloo H...,1,1,1,1,1,1
1,Banjo-Kazooie_N64_Banjo-Kazooie_Boggys Igloo S...,1,1,-1,1,1,1
2,Banjo-Kazooie_N64_Banjo-Kazooie_Bubblegloop Sw...,1,-1,1,1,1,1
3,Banjo-Kazooie_N64_Banjo-Kazooie_Click Clock Wo...,1,-1,1,1,1,1
4,Banjo-Kazooie_N64_Banjo-Kazooie_Ending.txt,1,1,1,1,1,1
...,...,...,...,...,...,...,...
199,Xeno_PS1_Xenogears_Tears of the Stars Hearts o...,1,-1,-1,1,1,-1
200,Xeno_PS1_Xenogears_The Wounded Shall Advance I...,1,1,1,1,1,1
201,Xeno_PS1_Xenogears_Treasure Which Cannot Be St...,1,1,1,1,1,1
202,Xeno_PS1_Xenogears_Two Pieces.txt,1,-1,-1,1,-1,-1


In [43]:
merged_df = pd.concat([df_midi, matched_rows], axis=1)

In [44]:
valence_columns = [col for col in df_valence.columns if col.startswith('valence_')]
for col in valence_columns:
    if col not in merged_df.columns:
        merged_df[col] = None  # Fill with None if not present

# Select the necessary columns
final_df = merged_df[['series', 'console', 'game', 'piece', 'valence'] + valence_columns]

final_df

Unnamed: 0,series,console,game,piece,valence,valence_gemma,valence_mistral,valence_llama,valence_phi,valence_qwen,valence_gpt
0,Banjo-Kazooie,N64,Banjo-Kazooie,Boggys Igloo Happy,1,1,1,1,1,1,1
1,Banjo-Kazooie,N64,Banjo-Kazooie,Boggys Igloo Sad,1,1,1,-1,1,1,1
2,Banjo-Kazooie,N64,Banjo-Kazooie,Bubblegloop Swamp,1,1,-1,1,1,1,1
3,Banjo-Kazooie,N64,Banjo-Kazooie,Click Clock Wood,1,1,-1,1,1,1,1
4,Banjo-Kazooie,N64,Banjo-Kazooie,Ending,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...
199,Xeno,PS1,Xenogears,Tears of the Stars Hearts of the People,-1,1,-1,-1,1,1,-1
200,Xeno,PS1,Xenogears,The Wounded Shall Advance Into the Light,1,1,1,1,1,1,1
201,Xeno,PS1,Xenogears,Treasure Which Cannot Be Stolen,1,1,1,1,1,1,1
202,Xeno,PS1,Xenogears,Two Pieces,1,1,-1,-1,1,-1,-1


In [45]:
final_df.to_csv('..\..\data\\results\\results.csv', index=False)

### Metrics

In [46]:
from sklearn.metrics import classification_report, confusion_matrix

# List of model valence columns (predictions)
model_valence_columns = [col for col in final_df.columns if col.startswith('valence_') and col != 'valence']

# Loop through each model's valence column and print the classification report and confusion matrix
for model_valence in model_valence_columns:
    print(f"\nClassification report for {model_valence} compared to valence (true labels):")
    
    # Get the true labels (valence) and predicted labels (model's valence)
    true_labels = final_df['valence']
    predicted_labels = final_df[model_valence]

    report = classification_report(true_labels, predicted_labels, labels=[1, -1], zero_division=0)
    print(report)
    
    # Generate and print the confusion matrix with -1 as negative class and 1 as positive class
    print(f"\nConfusion matrix for {model_valence} compared to valence (true labels):")
    cm = confusion_matrix(true_labels, predicted_labels, labels=[1, -1])
    print(cm)



Classification report for valence_gemma compared to valence (true labels):
              precision    recall  f1-score   support

           1       0.66      0.94      0.78       138
          -1       0.00      0.00      0.00        66

    accuracy                           0.64       204
   macro avg       0.33      0.47      0.39       204
weighted avg       0.45      0.64      0.53       204


Confusion matrix for valence_gemma compared to valence (true labels):
[[130   8]
 [ 66   0]]

Classification report for valence_mistral compared to valence (true labels):
              precision    recall  f1-score   support

           1       0.69      0.74      0.72       138
          -1       0.37      0.32      0.34        66

    accuracy                           0.60       204
   macro avg       0.53      0.53      0.53       204
weighted avg       0.59      0.60      0.59       204


Confusion matrix for valence_mistral compared to valence (true labels):
[[102  36]
 [ 45  21]]

C