# Week 4 & 5: Model Results and Comparison
This notebook evaluates the performance of the Random Forest baseline and the CNN-LSTM deep learning model.

In [None]:
import os
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
import tensorflow as tf

# Use project root for imports if needed
import sys
sys.path.append('..')
from src.models.random_forest_model import SleepApneaRFModel
from src.data.loader import APNEADataLoader

## 1. Random Forest Evaluation

In [None]:
# Load RF Model
rf_path = '../models/rf_baseline.pkl'
with open(rf_path, 'rb') as f:
    rf_data = pickle.load(f)
rf_model = rf_data['model']
scaler = rf_data['scaler']

# Load Features
df = pd.read_csv('../processed_data/extracted_features.csv')
X = df.drop(columns=['filename', 'label'])
y = df['label']

# Predict
X_scaled = scaler.transform(X)
y_pred = rf_model.predict(X_scaled)

print("Random Forest Classification Report:")
print(classification_report(y, y_pred))

# Confusion Matrix
cm = confusion_matrix(y, y_pred)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('RF Confusion Matrix')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

## 2. CNN-LSTM Evaluation

In [None]:
# Load CNN-LSTM Model
cnn_path = '../models/cnn_lstm_baseline.h5'
cnn_model = tf.keras.models.load_model(cnn_path)

# Load and Segment Data for verification
loader = APNEADataLoader('../APNEA HRV+SPO2 DATASET/HuGCDN2014-OXI')
recs = loader.get_record_list()
X_seg, y_seg = loader.get_segmented_dataset(recs[:10], segment_seconds=60)
X_seg = X_seg.reshape((X_seg.shape[0], X_seg.shape[1], 1))

# Predict
y_prob = cnn_model.predict(X_seg)
y_pred_cnn = (y_prob > 0.5).astype(int)

print("CNN-LSTM Classification Report (Sample Overflow):")
print(classification_report(y_seg, y_pred_cnn))

# ROC Curve
fpr, tpr, _ = roc_curve(y_seg, y_prob)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('CNN-LSTM ROC Curve')
plt.legend(loc="lower right")
plt.show()

## Why did training stop early?

The CNN-LSTM model training utilized an **EarlyStopping** callback with `patience=5`. 

The callback monitors the validation loss. If the validation loss does not improve for 5 consecutive epochs, the training is stopped to prevent **overfitting**. 

In our run:
- Epoch 2 had the best validation loss (0.6936).
- Epochs 3, 4, 5, 6, and 7 failed to improve upon that value.
- Training stopped at Epoch 7 as expected.