In [None]:
import pandas as pd
import numpy as np
from typing import Tuple
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import GridSearchCV
import joblib

def load_processed_data(filepath: str) -> pd.DataFrame:
    """
    Load processed data from CSV.

    Args:
        filepath (str): Path to processed CSV file.

    Returns:
        pd.DataFrame: Loaded dataframe.
    """
    return pd.read_csv(filepath)

def split_features_target(df: pd.DataFrame, target_col: str) -> Tuple[pd.DataFrame, pd.Series]:
    """
    Split dataframe into features and target.

    Args:
        df (pd.DataFrame): Input dataframe.
        target_col (str): Name of the target column.

    Returns:
        Tuple[pd.DataFrame, pd.Series]: Features and target.
    """
    X = df.drop(columns=[target_col])
    y = df[target_col]
    return X, y

def train_random_forest(X_train: pd.DataFrame, y_train: pd.Series) -> RandomForestClassifier:
    """
    Train a Random Forest classifier with basic hyperparameters.

    Args:
        X_train (pd.DataFrame): Training features.
        y_train (pd.Series): Training target.

    Returns:
        RandomForestClassifier: Trained model.
    """
    rf = RandomForestClassifier(random_state=42, n_jobs=-1)
    rf.fit(X_train, y_train)
    return rf

def train_xgboost(X_train: pd.DataFrame, y_train: pd.Series) -> XGBClassifier:
    """
    Train an XGBoost classifier with basic hyperparameters.

    Args:
        X_train (pd.DataFrame): Training features.
        y_train (pd.Series): Training target.

    Returns:
        XGBClassifier: Trained model.
    """
    xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
    xgb.fit(X_train, y_train)
    return xgb

def evaluate_model(model, X_test: pd.DataFrame, y_test: pd.Series) -> None:
    """
    Evaluate the model and print classification metrics.

    Args:
        model: Trained classification model.
        X_test (pd.DataFrame): Test features.
        y_test (pd.Series): Test target.
    """
    y_pred = model.predict(X_test)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    cm = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:\n", cm)

def save_model(model, filepath: str) -> None:
    """
    Save the trained model to a file using joblib.

    Args:
        model: Trained model.
        filepath (str): Path to save the model file.
    """
    joblib.dump(model, filepath)
    print(f"Model saved to {filepath}")

if __name__ == "__main__":
    processed_data_path = "../data/processed/processed_data.csv"
    
    # Load processed data
    df = load_processed_data(processed_data_path)
    
    # Split features and target
    X, y = split_features_target(df, target_col='Churn')
    
    # Split into train and test sets
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    
    # Train Random Forest
    print("Training Random Forest...")
    rf_model = train_random_forest(X_train, y_train)
    print("Evaluating Random Forest...")
    evaluate_model(rf_model, X_test, y_test)
    
    # Train XGBoost
    print("\nTraining XGBoost...")
    xgb_model = train_xgboost(X_train, y_train)
    print("Evaluating XGBoost...")
    evaluate_model(xgb_model, X_test, y_test)
    
    # Choose the best model (for example, XGBoost here) and save
    save_model(xgb_model, "../app/model.pkl")
