#### Setting up the Gamma GLM

In [143]:
# Imports:
import pandas as pd  # For setting up the data frame
from sklearn.preprocessing import StandardScaler  # For normalizing the numerical values
from sklearn.linear_model import GammaRegressor  # For the Gamma GLM
from sklearn.model_selection import train_test_split  # For setting up the train-test modularization
from sklearn.metrics import root_mean_squared_error
import joblib  # Persistent model
import numpy as np

# Constants:
DATASET_FILE_NAME = 'training-dataset.csv'

INDEP_VAR = ['year', 'pol_no_claims_discount', 'pol_duration', 'pol_pay_freq', 'pol_payd', 'pol_usage', 'drv_sex1', 'drv_age1', 'drv_age_lic1', 'drv_drv2', 'drv_sex2', 'drv_age2', 'drv_age_lic2', 'vh_age', 'vh_fuel', 'vh_type', 'vh_speed', 'vh_value', 'vh_weight', 'population', 'town_surface_area']
DEP_VAR = 'claim_amount'
 
CAT_VAR = ['pol_pay_freq', 'pol_payd', 'pol_usage', 'drv_sex1', 'drv_drv2', 'drv_sex2', 'vh_fuel', 'vh_type']
NUM_VAR = ['year', 'pol_no_claims_discount', 'pol_duration', 'drv_age1', 'drv_age_lic1', 'drv_age2', 'drv_age_lic2', 'vh_age', 'vh_speed', 'vh_value', 'vh_weight', 'population', 'town_surface_area']

MODEL_NAME = 'gamma-glm.joblib'

***
#### Making and cleaning the data frame

In [144]:
# Data frame and input set:
X, y = pd.read_csv(DATASET_FILE_NAME, usecols=INDEP_VAR), pd.read_csv(DATASET_FILE_NAME)[DEP_VAR]

# Cleaning the data frame:
X = X.fillna(0)  # Replaces NA entries with 0 for drv_age2, drv_age_lic2, vh_speed, vh_value, and vh_weight
X = pd.get_dummies(X, columns=CAT_VAR, drop_first=True)  # Converts CAT_VAR into boolean variables

scaler = StandardScaler()
scaled_var = pd.DataFrame(scaler.fit_transform((X[NUM_VAR])), columns=NUM_VAR)  # Normalizing the numerical variables
X = X.drop(columns=NUM_VAR)  # Dropping the old numerical variables
X = X.join(scaled_var)  # Joining the new numerical variables

***
#### Setting up the train-test modularization and Gamma GLM

In [145]:
# Training and testing sets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Gamma GLM:
gamma_glm = GammaRegressor()
# gamma_glm.fit(X_train, y_train)  # Parameters are supposed to be X, y
gamma_glm.fit(X, y)

***
#### Predicting the test set and calculating the accuracy

In [146]:
# Predictions:
predictions = gamma_glm.predict(X_test)

# RMSE:
RMSE = root_mean_squared_error(y_test, predictions)
print("RMSE:", round(RMSE, 3))
print("Pred Mean:", round(predictions.mean(), 3))
print("Actual Mean:", round(y_test.mean(), 3))
print("RMSE / Actual Mean:", round(RMSE / y_test.mean(), 3))

RMSE: 743.778
Pred Mean: 906.373
Actual Mean: 878.249
RMSE / Actual Mean: 0.847


***
#### Exporting the model

In [147]:
# Persistent model:
joblib.dump(gamma_glm, MODEL_NAME)

['gamma-glm.joblib']