In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
from sklearn.utils import resample

def evaluate(file_path):
    """
    Evaluates the performance of a model based on true and predicted labels from a CSV file.
    
    Parameters:
    - file_path: str, path to the CSV file containing 'true_label_str' and 'predicted_label' columns.
    
    Returns:
    - Prints accuracy, F1 score, recall, and precision metrics with standard deviations.
    """
    # Load the CSV file
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
        exit()
    except Exception as e:
        print(f"Error loading CSV file: {e}")
        exit()

    # Extract true and predicted labels
    # Convert to lowercase to handle inconsistencies like 'Behavior' vs 'behavior'
    try:
        true_labels = df['true_label_str'].astype(str).str.lower()
        predicted_labels = df['predicted_label'].astype(str).str.lower()
    except KeyError as e:
        print(f"Error: Column {e} not found in the CSV. Please check column names.")
        exit()

    # Number of rows
    num_rows = len(df)

    # Ensure there are labels to process
    if num_rows == 0:
        print("The CSV file is empty or has no data rows.")
        exit()

    # --- Metric Calculations ---
    le = LabelEncoder()
    all_labels = pd.concat([true_labels, predicted_labels]).unique()
    le.fit(all_labels)

    true_labels_encoded = le.transform(true_labels)
    predicted_labels_encoded = le.transform(predicted_labels)

    # Get the unique labels present in the true labels
    unique_true_labels_encoded = pd.Series(true_labels_encoded).unique()

    # Calculate metrics on full dataset
    accuracy = accuracy_score(true_labels_encoded, predicted_labels_encoded)*100
    weighted_f1 = f1_score(true_labels_encoded, predicted_labels_encoded, average='weighted', zero_division=0)*100
    macro_f1 = f1_score(true_labels_encoded, predicted_labels_encoded, average='macro', labels=unique_true_labels_encoded, zero_division=0)*100

    # Calculate standard deviations using bootstrap resampling
    n_bootstrap = 1000
    random_state = 42
    
    accuracies = []
    weighted_f1s = []
    macro_f1s = []
    
    # Create a DataFrame with true and predicted labels for easy resampling
    bootstrap_data = pd.DataFrame({'true': true_labels_encoded, 'pred': predicted_labels_encoded})
    
    for i in range(n_bootstrap):
        # Resample with replacement
        boot_sample = resample(bootstrap_data, replace=True, n_samples=len(bootstrap_data), random_state=random_state+i)
        
        boot_true = boot_sample['true'].values
        boot_pred = boot_sample['pred'].values
        
        boot_unique_true_labels = pd.Series(boot_true).unique()
        
        accuracies.append(accuracy_score(boot_true, boot_pred))
        weighted_f1s.append(f1_score(boot_true, boot_pred, average='weighted', zero_division=0))
        macro_f1s.append(f1_score(boot_true, boot_pred, average='macro', labels=boot_unique_true_labels, zero_division=0))
    
    accuracy_std = np.std(accuracies)*100
    weighted_f1_std = np.std(weighted_f1s)*100
    macro_f1_std = np.std(macro_f1s)*100

    # Print the results
    print(f"--- Metrics for {file_path} ---")
    print(f"Number of rows: {num_rows}")
    print(f"Agreement: {(num_rows/500)*100:.1f}%")
    print(f"Accuracy: {accuracy:.1f} \pm {accuracy_std:.1f}")
    print(f"Weighted F1-score: {weighted_f1:.1f} \pm {weighted_f1_std:.1f}")
    print(f"Macro-averaged F1-score: {macro_f1:.1f} \pm {macro_f1_std:.1f}")

    # Print class proportions
    class_proportions = df['true_label_str'].value_counts(normalize=True)
    print("\nClass Proportions: ", class_proportions)
    print("\nClass Proportions:")
    for label, proportion in class_proportions.items():
        print(f"{label}: {proportion:.4f}")

  print(f"Accuracy: {accuracy:.1f} \pm {accuracy_std:.1f}")
  print(f"Weighted F1-score: {weighted_f1:.1f} \pm {weighted_f1_std:.1f}")
  print(f"Macro-averaged F1-score: {macro_f1:.1f} \pm {macro_f1_std:.1f}")


In [2]:
file_path = r'train_comb_deepT+grokN.csv'
evaluate(file_path)

--- Metrics for train_comb_deepT+grokN.csv ---
Number of rows: 398
Agreement: 79.6%
Accuracy: 84.2 \pm 1.8
Weighted F1-score: 84.1 \pm 1.8
Macro-averaged F1-score: 83.5 \pm 2.1

Class Proportions:  true_label_str
ideation     0.419598
behavior     0.271357
indicator    0.228643
attempt      0.080402
Name: proportion, dtype: float64

Class Proportions:
ideation: 0.4196
behavior: 0.2714
indicator: 0.2286
attempt: 0.0804


In [3]:
file_path = r'train_comb_deepT+grokT+grokN.csv'
evaluate(file_path)

--- Metrics for train_comb_deepT+grokT+grokN.csv ---
Number of rows: 373
Agreement: 74.6%
Accuracy: 86.6 \pm 1.8
Weighted F1-score: 86.5 \pm 1.8
Macro-averaged F1-score: 85.7 \pm 2.0

Class Proportions:  true_label_str
ideation     0.426273
behavior     0.265416
indicator    0.227882
attempt      0.080429
Name: proportion, dtype: float64

Class Proportions:
ideation: 0.4263
behavior: 0.2654
indicator: 0.2279
attempt: 0.0804


In [4]:
file_path = r'train_comb_deepT+geminiN+grokT+grokN.csv'
evaluate(file_path)

--- Metrics for train_comb_deepT+geminiN+grokT+grokN.csv ---
Number of rows: 342
Agreement: 68.4%
Accuracy: 88.3 \pm 1.7
Weighted F1-score: 88.2 \pm 1.7
Macro-averaged F1-score: 86.8 \pm 2.2

Class Proportions:  true_label_str
ideation     0.435673
behavior     0.269006
indicator    0.210526
attempt      0.084795
Name: proportion, dtype: float64

Class Proportions:
ideation: 0.4357
behavior: 0.2690
indicator: 0.2105
attempt: 0.0848


In [None]:
file_paths = [
    r'/home/noxiusk/Desktop/data_science/dissertation/main/NO-fold/validation_split_comb_fold1+deepT+geminiN+grokT+grokN.csv',
    r'/home/noxiusk/Desktop/data_science/dissertation/main/NO-fold/validation_split_comb_fold2+deepT+geminiN+grokT+grokN.csv',
    r'/home/noxiusk/Desktop/data_science/dissertation/main/NO-fold/validation_split_comb_fold3+deepT+geminiN+grokT+grokN.csv',
    r'/home/noxiusk/Desktop/data_science/dissertation/main/NO-fold/validation_split_comb_fold4+deepT+geminiN+grokT+grokN.csv',
    r'/home/noxiusk/Desktop/data_science/dissertation/main/NO-fold/validation_split_comb_fold5+deepT+geminiN+grokT+grokN.csv',
]

for file_path in file_paths:
    evaluate(file_path)

Error: The file 'f:\VERO UTENTE\Desktop\Uni\dissertation\main\NO-fold\validation_split_comb_fold1+deepT+geminiN+grokT+grokN.csv' was not found.


  print(f"Accuracy: {accuracy:.1f} \pm {accuracy_std:.1f}")
  print(f"Weighted F1-score: {weighted_f1:.1f} \pm {weighted_f1_std:.1f}")
  print(f"Macro-averaged F1-score: {macro_f1:.1f} \pm {macro_f1_std:.1f}")


UnboundLocalError: cannot access local variable 'df' where it is not associated with a value

: 

In [3]:
indicator = [0.1754, 0.2273, 0.2295, 0.2295, 0.2459]
ideation = [0.4561,  0.4091, 0.5246, 0.4918, 0.4590]
behavior = [0.2632, 0.2576, 0.1803, 0.2295, 0.2623, 0.1061]
attempt = [0.0328, 0.0492, 0.0656, 0.1061, 0.1053]

avg_indicator = sum(indicator) / len(indicator)
avg_ideation = sum(ideation) / len(ideation)
avg_behavior = sum(behavior) / len(behavior)
avg_attempt = sum(attempt) / len(attempt)

print(f"\nAverage Indicator: {avg_indicator:.4f}")
print(f"Average Ideation: {avg_ideation:.4f}")
print(f"Average Behavior: {avg_behavior:.4f}")
print(f"Average Attempt: {avg_attempt:.4f}")



Average Indicator: 0.2215
Average Ideation: 0.4681
Average Behavior: 0.2165
Average Attempt: 0.0718


AVG RESULTS:

Number of rows: 61
accuracy: 0.8931
Weighted F1: 0.8891


*****TEST CHECK*****

In [None]:
#deep-seek
file_path = r'f:\VERO UTENTE\Desktop\Uni\dissertation\main\NO-fold\validation_split_comb_fold1+deepT+geminiN+grokT+grokN.csv'
evaluate(file_path)

Error: The file 'f:\VERO UTENTE\Desktop\Uni\dissertation\main\NO-fold\validation_split_comb_fold1+deepT+geminiN+grokT+grokN.csv' was not found.


  print(f"Accuracy: {accuracy:.1f} \pm {accuracy_std:.1f}")
  print(f"Weighted F1-score: {weighted_f1:.1f} \pm {weighted_f1_std:.1f}")
  print(f"Macro-averaged F1-score: {macro_f1:.1f} \pm {macro_f1_std:.1f}")


UnboundLocalError: cannot access local variable 'df' where it is not associated with a value

: 