# Model Training and Evaluation
This notebook focuses on continuing the training of the [sequence models](sequence_modeling.ipynb) (LSTM and RNN), evaluating their performance, and documenting the results.


In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    confusion_matrix, 
    classification_report, 
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score
)
from tensorflow.keras.models import load_model
from data_preparation import prepare_data_for_models

## Load Data and Models

In [2]:
X_train, X_test, y_train, y_test = prepare_data_for_models()

# Load pre-trained models
lstm_model = load_model('../models/lstm_model.h5')
rnn_model = load_model('../models/rnn_model.h5')

## Model Evaluation

In [14]:
def evaluate_model(model, X_test, y_test, model_name):
    # Predict probabilities
    y_pred_proba = model.predict(X_test)
    y_pred = np.argmax(y_pred_proba, axis=1)
    
    # Basic metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    # Detailed classification report
    class_report = classification_report(y_test, y_pred, zero_division=0)
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    
    # Print results
    print(f"\n{model_name} Model Evaluation:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print("\nDetailed Classification Report:")
    print(class_report)
    
    # Visualize Confusion Matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.tight_layout()
    plt.savefig(f'../results/{model_name.lower()}_confusion_matrix.png')
    plt.close()
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'classification_report': class_report,
        'confusion_matrix': cm,
        'predictions': y_pred, 
        'probabilities': y_pred_proba, 
        'true_labels': y_test  
    }

In [15]:
# Evaluate both models
lstm_results = evaluate_model(lstm_model, X_test, y_test, 'LSTM')
rnn_results = evaluate_model(rnn_model, X_test, y_test, 'RNN')


LSTM Model Evaluation:
Accuracy: 0.9554
Precision: 0.9565
Recall: 0.9554
F1-Score: 0.9552

Detailed Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.93      0.87       236
           1       0.98      0.98      0.98       851
           2       0.97      0.96      0.96       153
           3       0.97      0.96      0.96       123
           4       0.93      0.82      0.87       294
           5       0.97      0.98      0.98      1098

    accuracy                           0.96      2755
   macro avg       0.94      0.94      0.94      2755
weighted avg       0.96      0.96      0.96      2755


RNN Model Evaluation:
Accuracy: 0.6083
Precision: 0.4838
Recall: 0.6083
F1-Score: 0.5282

Detailed Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       236
           1       0.71      0.68      0.70       851
           2       0.84      0.69      0.76    

## Visualization of Results

In [16]:
def advanced_visualizations(lstm_results, rnn_results, y_test):
    plt.figure(figsize=(20, 15))
    plt.subplots_adjust(hspace=0.4, wspace=0.3)

    # 1. Performance Metrics Comparison
    plt.subplot(2, 3, 1)
    metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
    lstm_scores = [
        lstm_results['accuracy'], 
        lstm_results['precision'], 
        lstm_results['recall'], 
        lstm_results['f1_score']
    ]
    rnn_scores = [
        rnn_results['accuracy'], 
        rnn_results['precision'], 
        rnn_results['recall'], 
        rnn_results['f1_score']
    ]
    
    x = np.arange(len(metrics))
    width = 0.35
    plt.bar(x - width/2, lstm_scores, width, label='LSTM', color='blue', alpha=0.7)
    plt.bar(x + width/2, rnn_scores, width, label='RNN', color='green', alpha=0.7)
    plt.title('Model Performance Metrics Comparison')
    plt.ylabel('Score')
    plt.xticks(x, metrics, rotation=45)
    plt.legend()

    # 2. Per-Class Precision Comparison
    plt.subplot(2, 3, 2)
    class_names = np.unique(y_test)
    
    # Compute per-class precision for both models
    lstm_class_precision = classification_report(
        y_test, 
        lstm_results['predictions'], 
        output_dict=True
    )
    rnn_class_precision = classification_report(
        y_test, 
        rnn_results['predictions'], 
        output_dict=True
    )
    
    lstm_precisions = [lstm_class_precision[str(cls)]['precision'] for cls in class_names]
    rnn_precisions = [rnn_class_precision[str(cls)]['precision'] for cls in class_names]
    
    x = np.arange(len(class_names))
    width = 0.35
    plt.bar(x - width/2, lstm_precisions, width, label='LSTM', color='blue', alpha=0.7)
    plt.bar(x + width/2, rnn_precisions, width, label='RNN', color='green', alpha=0.7)
    plt.title('Per-Class Precision Comparison')
    plt.ylabel('Precision')
    plt.xticks(x, class_names, rotation=45)
    plt.legend()

    # 3. Prediction Probability Distribution
    plt.subplot(2, 3, 3)
    lstm_max_proba = np.max(lstm_results['probabilities'], axis=1)
    rnn_max_proba = np.max(rnn_results['probabilities'], axis=1)
    
    plt.hist(lstm_max_proba, bins=50, alpha=0.5, label='LSTM', color='blue')
    plt.hist(rnn_max_proba, bins=50, alpha=0.5, label='RNN', color='green')
    plt.title('Prediction Probability Distribution')
    plt.xlabel('Maximum Prediction Probability')
    plt.ylabel('Frequency')
    plt.legend()

    # 4. Misclassification Heatmap for LSTM
    plt.subplot(2, 3, 4)
    misclass_lstm = lstm_results['confusion_matrix'].astype('float') / lstm_results['confusion_matrix'].sum(axis=1)[:, np.newaxis]
    np.fill_diagonal(misclass_lstm, 0)
    
    sns.heatmap(misclass_lstm, cmap='YlOrRd', annot=True, fmt='.2f')
    plt.title('LSTM Misclassification Heatmap')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')

    # 5. Misclassification Heatmap for RNN
    plt.subplot(2, 3, 5)
    misclass_rnn = rnn_results['confusion_matrix'].astype('float') / rnn_results['confusion_matrix'].sum(axis=1)[:, np.newaxis]
    np.fill_diagonal(misclass_rnn, 0)
    
    sns.heatmap(misclass_rnn, cmap='YlOrRd', annot=True, fmt='.2f')
    plt.title('RNN Misclassification Heatmap')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')

    plt.suptitle('Comprehensive Model Performance Analysis', fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.savefig('../results/model_performance_analysis.png', dpi=300)
    plt.close()

In [17]:
advanced_visualizations(lstm_results, rnn_results, y_test)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Compare Models

In [18]:
def compare_models(lstm_results, rnn_results):
    print("\nModel Comparison:")
    print("Metric\t\tLSTM\t\tRNN")
    print(f"Accuracy\t{lstm_results['accuracy']:.4f}\t\t{rnn_results['accuracy']:.4f}")
    print(f"Precision\t{lstm_results['precision']:.4f}\t\t{rnn_results['precision']:.4f}")
    print(f"Recall\t\t{lstm_results['recall']:.4f}\t\t{rnn_results['recall']:.4f}")
    print(f"F1-Score\t{lstm_results['f1_score']:.4f}\t\t{rnn_results['f1_score']:.4f}")

compare_models(lstm_results, rnn_results)


Model Comparison:
Metric		LSTM		RNN
Accuracy	0.9554		0.6083
Precision	0.9565		0.4838
Recall		0.9554		0.6083
F1-Score	0.9552		0.5282


In [19]:
# Save results to CSV
results_df = pd.DataFrame({
    'Model': ['LSTM', 'RNN'],
    'Accuracy': [lstm_results['accuracy'], rnn_results['accuracy']],
    'Precision': [lstm_results['precision'], rnn_results['precision']],
    'Recall': [lstm_results['recall'], rnn_results['recall']],
    'F1-Score': [lstm_results['f1_score'], rnn_results['f1_score']]
})
results_df.to_csv('../results/model_comparison_results.csv', index=False)

## Conclusion
This notebook continued the training of the LSTM and RNN models, evaluated their performance, and documented the results. The models were assessed using accuracy, precision, recall, and confusion matrix metrics. The training processes and results were visualized to provide a clear understanding of the models' performance.