In [None]:
# Import necessary modules
import pandas as pd
from scripts.data_prep import load_data, preprocess_data, split_data
from scripts.statistical_modeling import train_linear_regression, train_random_forest, train_xgboost, evaluate_model
from scripts.utils import plot_feature_importance, shap_analysis

In [None]:
# Load and preprocess the data
try:
    data = load_data('data/insurance_data.csv')
    data = preprocess_data(data)
    print("Data loaded and preprocessed successfully.")
except Exception as e:
    print(f"Error loading or preprocessing data: {e}")

In [None]:
# Split data into train and test sets
try:
    X_train, X_test, y_train, y_test = split_data(data, target_column='TotalClaims')
    print("Data split successfully.")
except Exception as e:
    print(f"Error splitting data: {e}")

In [None]:
# Train and evaluate models
try:
    # Linear Regression
    lin_reg = train_linear_regression(X_train, y_train)
    lin_rmse, lin_r2 = evaluate_model(lin_reg, X_test, y_test)
    print(f"Linear Regression -> RMSE: {lin_rmse}, R²: {lin_r2}")

    # Random Forest
    rf_model = train_random_forest(X_train, y_train)
    rf_rmse, rf_r2 = evaluate_model(rf_model, X_test, y_test)
    print(f"Random Forest -> RMSE: {rf_rmse}, R²: {rf_r2}")

    # XGBoost
    xgb_model = train_xgboost(X_train, y_train)
    xgb_rmse, xgb_r2 = evaluate_model(xgb_model, X_test, y_test)
    print(f"XGBoost -> RMSE: {xgb_rmse}, R²: {xgb_r2}")
except Exception as e:
    print(f"Error training or evaluating models: {e}")

In [None]:
# Feature importance and SHAP analysis for tree-based models
try:
    print("Performing feature importance and SHAP analysis for Random Forest...")
    plot_feature_importance(rf_model.feature_importances_, X_train.columns, title="Random Forest Feature Importance")
    shap_analysis(rf_model, X_test)
except Exception as e:
    print(f"Error analyzing feature importance or SHAP: {e}")