# LOPOCV Results

This notebook loads the `LOPOCV_results.csv` generated by the training script and produces:

1. Bar charts comparing precision, recall, F1-score, and AUC across projects.
2. A grid of confusion matrix heatmaps, one per project.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ast

## 1. Load Results

Load the results CSV and display the first few rows.

In [None]:
results_path = Path('outputs/train/LOPOCV_results.csv')
df = pd.read_csv(results_path)

df.head()

## 2. Bar Chart of Metrics

Plot precision, recall, F1-score, and AUC for each project.

In [None]:
metrics = ['precision', 'recall', 'f1', 'auc']
plt.figure(figsize=(12, 6))
for m in metrics:
    plt.plot(df['project'], df[m], marker='o', label=m)
plt.xlabel('Project')
plt.ylabel('Score')
plt.title('LOPOCV Metrics by Project')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 3. Confusion Matrices

Visualize the confusion matrix for each project in a grid.

In [None]:
# Determine grid size
d = len(df)
cols = 3
rows = int(np.ceil(d / cols))

fig, axes = plt.subplots(rows, cols, figsize=(cols*4, rows*4))
axes = axes.flatten()

for i, row in df.iterrows():
    cm = np.array(ast.literal_eval(row['confusion_matrix']))
    ax = axes[i]
    im = ax.imshow(cm, cmap='Blues')
    ax.set_title(row['project'])
    ax.set_xlabel('Predicted')
    ax.set_ylabel('True')
    # Annotate counts
    for (j, k), val in np.ndenumerate(cm):
        ax.text(k, j, val, ha='center', va='center')

# Hide unused subplots
for j in range(d, len(axes)):
    fig.delaxes(axes[j])

fig.colorbar(im, ax=axes.tolist())
plt.tight_layout()
plt.show()

## 4. Summary Table

Display the summary metrics sorted by F1-score.

In [None]:
df_sorted = df.sort_values(by='f1', ascending=False)
df_sorted[['project', 'precision', 'recall', 'f1', 'auc']]