In [3]:
# Import libraries
import pandas as pd
from src.preprocessing import load_data, prepare_frequency_data, prepare_severity_data
from src.models import fit_poisson_glm, fit_gamma_glm
from src.evaluation import regression_metrics

In [6]:
# Load dataset
df = load_data("../Data/raw/GLM_example.csv")

In [7]:
# Prepare data
df_freq = prepare_frequency_data(df)
df_sev = prepare_severity_data(df)

FEATURES = ["VehPower", "VehAge", "DrivAge", "BonusMalus", "Density"]

X_freq = df_freq[FEATURES]
y_freq = df_freq["ClaimNb"]
offset = df_freq["log_exposure"]

X_sev = df_sev[FEATURES]
y_sev = df_sev["ClaimAmount"]

In [8]:

# Fit models
model_pois = fit_poisson_glm(X_freq, y_freq, offset)
model_gamma = fit_gamma_glm(X_sev, y_sev)



In [12]:
import statsmodels.api as sm

# Predictions
X_freq_const = sm.add_constant(X_freq)
X_sev_const = sm.add_constant(X_sev)

freq_pred = model_pois.predict(X_freq_const, offset=offset)
sev_pred = model_gamma.predict(X_sev_const)

pure_premium_pred = freq_pred * sev_pred


In [13]:
# Evaluate
metrics_freq = regression_metrics(y_freq, freq_pred)
metrics_sev = regression_metrics(y_sev, sev_pred)

print("Frequency model metrics:", metrics_freq)
print("Severity model metrics:", metrics_sev)

Frequency model metrics: {'MSE': np.float64(0.04121005904052894), 'RMSE': np.float64(0.20300260845745047), 'R2': 0.019738990434141623}
Severity model metrics: {'MSE': np.float64(66295705.449217476), 'RMSE': np.float64(8142.217477396282), 'R2': 0.0006444577731502532}


In [14]:
# Combine results
results_df = df.copy()
results_df["Predicted_ClaimNb"] = freq_pred
results_df["Predicted_ClaimAmount"] = 0
results_df.loc[results_df["ClaimNb"] > 0, "Predicted_ClaimAmount"] = sev_pred
results_df["Predicted_PurePremium"] = pure_premium_pred
results_df.head()

 1893.97756837]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  results_df.loc[results_df["ClaimNb"] > 0, "Predicted_ClaimAmount"] = sev_pred


Unnamed: 0,IDpol,ClaimNb,Exposure,Area,VehPower,VehAge,DrivAge,BonusMalus,VehBrand,VehGas,...,Region[T.R91],Region[T.R93],Region[T.R94],VehPower.1,VehAge.1,DrivAge.1,BonusMalus.1,Predicted_ClaimNb,Predicted_ClaimAmount,Predicted_PurePremium
0,1,0,0.1,D,5,0,55,50,B12,Regular,...,0.0,0.0,0.0,5.0,0.0,55.0,50.0,0.005954,0.0,
1,3,0,0.77,D,5,0,55,50,B12,Regular,...,0.0,0.0,0.0,5.0,0.0,55.0,50.0,0.045845,0.0,
2,5,0,0.75,B,6,2,52,50,B12,Diesel,...,0.0,0.0,0.0,6.0,2.0,52.0,50.0,0.043771,0.0,
3,10,0,0.09,B,7,0,46,50,B12,Diesel,...,0.0,0.0,0.0,7.0,0.0,46.0,50.0,0.005418,0.0,
4,11,0,0.84,B,7,0,46,50,B12,Diesel,...,0.0,0.0,0.0,7.0,0.0,46.0,50.0,0.050564,0.0,
