# Updated Model Comparison Notebook

This notebook reads metric CSV files for different models and compares their performance across feature sets. Index reading issue fixed with `index_col=0`.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Read metric CSVs into DataFrames using index_col=0 to correctly set feature_set as index
df_nn  = pd.read_csv('nn_comparison_metrics.csv', index_col=0)
df_nn.index.name = 'feature_set'
df_svc = pd.read_csv('svc_comparison_metrics.csv', index_col=0)
df_svc.index.name = 'feature_set'
df_knn = pd.read_csv('knn_comparison_metrics.csv', index_col=0)
df_knn.index.name = 'feature_set'
df_rf  = pd.read_csv('rf_comparison_metrics.csv', index_col=0)
df_rf.index.name = 'feature_set'

# Add a column to identify the model
df_nn['model']  = 'Neural Network'
df_svc['model'] = 'SVC'
df_knn['model'] = 'KNN'
df_rf['model']  = 'Random Forest'

# Combine all DataFrames into one consolidated DataFrame
df_all = pd.concat([df_nn, df_svc, df_knn, df_rf]).reset_index()

# Display combined metrics table
print("Combined Metrics Table:")
display(df_all)

# Plot Mean CV Accuracy for each model across feature sets
plt.figure(figsize=(10, 6))
for model in df_all['model'].unique():
    subset = df_all[df_all['model'] == model]
    plt.plot(subset['feature_set'], subset['mean_cv_accuracy'], marker='o', label=model)

plt.title('Mean CV Accuracy by Feature Set and Model')
plt.xlabel('Feature Set')
plt.ylabel('Mean CV Accuracy')
plt.legend()
plt.grid(True)
plt.show()

# Plot Final Accuracy (or mean F1 for SVC/RF if final_accuracy missing) for each model
plt.figure(figsize=(10, 6))
for model in df_all['model'].unique():
    subset = df_all[df_all['model'] == model]
    if 'final_accuracy' in subset.columns:
        y_vals = subset['final_accuracy']
        ylabel = 'Final Accuracy'
    else:
        y_vals = subset['mean_f1_weighted']
        ylabel = 'Mean F1 Weighted'
    plt.plot(subset['feature_set'], y_vals, marker='s', label=model)

plt.title('Final Performance by Feature Set and Model')
plt.xlabel('Feature Set')
plt.ylabel(ylabel)
plt.legend()
plt.grid(True)
plt.show()
