# Model Training & Evaluation

In this notebook we train a logistic regression classifier to predict periods of elevated market risk using the engineered features. We evaluate the model's performance using standard classification metrics and visualise the confusion matrix.

In [None]:
import pandas as pd
from pathlib import Path
from src.model_training import train_logistic_model
from src.visualization import plot_return_distribution, plot_confusion_matrix

# Load processed data
analytical_df = pd.read_csv('data/processed/analytical_data.csv', index_col=0, parse_dates=True)

# Define feature columns (exclude target and Next_Return)
feature_cols = ['Log_Return', 'Rolling_Vol']
if 'Sentiment' in analytical_df.columns:
    feature_cols.append('Sentiment')

# Train model and evaluate
results = train_logistic_model(analytical_df, feature_cols)
print(f"Accuracy: {results.accuracy:.3f}
Precision: {results.precision:.3f}
Recall: {results.recall:.3f}
F1-Score: {results.f1:.3f}
ROC-AUC: {results.roc_auc:.3f}")
print("Confusion Matrix:
", results.confusion)

# Plot confusion matrix
plot_confusion_matrix(results.confusion, path=Path('visualizations/confusion_matrix.png'))