# Machine Learning Benchmarking for Claim Frequency

**Project:** PRISM – Predictive & Research-based Insurance Statistical Modeling

## Objective
To benchmark GLM performance against machine learning models and evaluate the trade-off between accuracy and interpretability.


In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_poisson_deviance

from xgboost import XGBRegressor


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
freq = pd.read_csv(
    "/content/drive/MyDrive/freMTPL2freq.csv"
)

freq = freq.rename(columns={
    "IDpol": "policy_id",
    "ClaimNb": "claim_count",
    "Exposure": "exposure",
    "Area": "area",
    "VehPower": "vehicle_power",
    "VehAge": "vehicle_age",
    "DrivAge": "driver_age",
    "BonusMalus": "bonus_malus",
    "VehBrand": "vehicle_brand",
    "VehGas": "vehicle_gas"
})


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from xgboost import XGBRegressor
from sklearn.metrics import mean_poisson_deviance


In [None]:
X = freq[[
    "area", "vehicle_power", "vehicle_age", "driver_age",
    "bonus_malus", "vehicle_brand", "vehicle_gas"
]]

y = freq["claim_count"]
exposure = freq["exposure"]


In [None]:
X_train, X_test, y_train, y_test, exp_train, exp_test = train_test_split(
    X, y, exposure, test_size=0.25, random_state=42
)


In [None]:
categorical = ["area", "vehicle_brand", "vehicle_gas"]
numerical = ["vehicle_power", "vehicle_age", "driver_age", "bonus_malus"]

preprocess = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical),
    ("num", "passthrough", numerical)
])


In [None]:
xgb_model = Pipeline([
    ("prep", preprocess),
    ("model", XGBRegressor(
        n_estimators=300,
        max_depth=4,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        objective="count:poisson",
        random_state=42
    ))
])


In [None]:
xgb_model.fit(X_train, y_train, model__sample_weight=exp_train)


In [None]:
y_pred = xgb_model.predict(X_test)


In [None]:
mean_poisson_deviance(y_test, y_pred, sample_weight=exp_test)


0.34510019639716993

In [None]:
from sklearn.metrics import mean_poisson_deviance

# Load expected frequency from GLM
glm_pred = pd.read_csv(
    "/content/drive/MyDrive/expected_frequency.csv"
)

# Align with test set
glm_test = glm_pred.loc[X_test.index, "expected_frequency"]

mean_poisson_deviance(y_test, glm_test, sample_weight=exp_test)


0.3579508408841741

## ML vs GLM Comparison

XGBoost achieves a slightly lower mean Poisson deviance than the GLM, indicating marginally better predictive performance.  
However, the GLM is retained as the production model due to its interpretability, stability, and regulatory suitability.  
This demonstrates the trade-off between accuracy and explainability in insurance pricing.
