In [3]:
# ============================================================================
# 03_modeling.ipynb
# Model Training and Evaluation for Multi-Class Diabetes Classification
# Dataset: BRFSS 2015 - Diabetes Health Indicators (3 Classes)
# 
# OPTIMIZATION GOAL: HIGH RECALL
# Medical Context: In diabetes screening, it is more important to identify
# all potential diabetes cases (high recall) even if it means some false 
# positives. Missing a diabetes case (false negative) has more serious 
# health consequences than a false alarm (false positive).
# ============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
import warnings
warnings.filterwarnings('ignore')

# Add src directory to path to import custom modules
sys.path.append('../src/core')

# Import custom modules for feature engineering and modeling
from feature_engineering import apply_all_feature_engineering
from modeling import (
    train_logistic_regression,
    train_random_forest,
    train_xgboost,
    train_svm,
    evaluate_model,
    plot_confusion_matrix,
    plot_classification_report,
    plot_roc_curves,
    compare_models,
    save_model
)

# Scikit-learn imports for scaling
from sklearn.preprocessing import StandardScaler

# Configure plot style for consistent visualizations
sns.set_style("whitegrid")
sns.set_palette("colorblind")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

# Define output directories for saving visualizations and models
output_dir = "../outputs/figures/modeling"
os.makedirs(output_dir, exist_ok=True)
print(f"Output directory created: {output_dir}")

models_dir = "../outputs/models"
os.makedirs(models_dir, exist_ok=True)
print(f"Models directory created: {models_dir}")


Output directory created: ../outputs/figures/modeling
Models directory created: ../outputs/models
