# 3. Results Visualization and Analysis

**Objective:** Load the final evaluation results generated by `evaluate.py`, create comparative visualizations, and perform statistical analysis to validate the thesis hypothesis.

In [None]:
import os
import re
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image

sns.set_style('whitegrid')

### Step 1: Parse Evaluation Results from Log Files

We will read all `evaluation_results_...txt` files from the `../results` directory and extract the key performance metrics.

In [None]:
def parse_eval_file(filepath):
    metrics = {}
    with open(filepath, 'r') as f:
        for line in f:
            if 'Model:' in line:
                metrics['Model'] = line.split(':', 1)[1].strip()
            elif ':' in line:
                key, val = line.split(':', 1)
                try:
                    metrics[key.strip()] = float(re.findall(r"[\d\.]+", val)[0])
                except IndexError:
                    continue
    return metrics

results_dir = '../results/'
eval_files = [os.path.join(results_dir, f) for f in os.listdir(results_dir) if f.startswith('evaluation_results_') and f.endswith('.txt')]

results_data = [parse_eval_file(f) for f in eval_files]
df_results = pd.DataFrame(results_data)
df_results = df_results.sort_values(by='F1-Score', ascending=False)

print("--- Final Model Performance Summary ---")
display(df_results)

### Step 2: Visualize Performance Comparison

Create bar charts to visually compare the key performance metrics across all models. This is a crucial visualization for Chapter 4.

In [None]:
def plot_metrics(df, metrics_to_plot):
    fig, axes = plt.subplots(1, len(metrics_to_plot), figsize=(18, 6), sharey=True)
    fig.suptitle('Model Performance Comparison on Test Set', fontsize=16)

    for i, metric in enumerate(metrics_to_plot):
        sns.barplot(ax=axes[i], data=df, x='Model', y=metric, palette='viridis')
        axes[i].set_title(metric)
        axes[i].set_xlabel('')
        axes[i].set_ylabel('')
        axes[i].tick_params(axis='x', rotation=45)
        for container in axes[i].containers:
            axes[i].bar_label(container, fmt='%.4f')
    
    axes[0].set_ylabel('Score')
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.show()

if not df_results.empty:
    plot_metrics(df_results, ['Accuracy', 'Precision', 'Recall', 'F1-Score'])

### Step 3: Display Confusion Matrices

Load and display the confusion matrix images generated by `evaluate.py` for a side-by-side qualitative comparison of error patterns.

In [None]:
cm_files = sorted([os.path.join(results_dir, f) for f in os.listdir(results_dir) if f.startswith('confusion_matrix_') and f.endswith('.png')])

if cm_files:
    num_files = len(cm_files)
    fig, axes = plt.subplots(1, num_files, figsize=(5 * num_files, 5))
    if num_files == 1:
        axes = [axes] # Make it iterable for a single plot
    fig.suptitle('Confusion Matrices on Test Set', fontsize=16)

    for ax, file in zip(axes, cm_files):
        img = Image.open(file)
        model_name = os.path.basename(file).replace('confusion_matrix_','').replace('.png', '')
        ax.imshow(img)
        ax.set_title(model_name.replace('_', ' ').title())
        ax.axis('off')
    
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()