#### Setting up the Gamma GLM

In [24]:
# Imports:
import pandas as pd  # For setting up the data frame
from sklearn.preprocessing import StandardScaler  # For normalizing the numerical values
import joblib  # Persistent model

# Constants:
MODEL_NAME = 'gamma-glm.joblib'
SCORE_DATASET_FILE_NAME = 'scoring-dataset.csv'

INDEP_VAR = ['year', 'pol_no_claims_discount', 'pol_duration', 'pol_pay_freq', 'pol_payd', 'pol_usage', 'drv_sex1', 'drv_age1', 'drv_age_lic1', 'drv_drv2', 'drv_sex2', 'drv_age2', 'drv_age_lic2','vh_age', 'vh_fuel', 'vh_type', 'vh_speed', 'vh_value', 'vh_weight', 'population', 'town_surface_area']

CAT_VAR = ['pol_pay_freq', 'pol_payd', 'pol_usage', 'drv_sex1', 'drv_drv2', 'drv_sex2', 'vh_fuel', 'vh_type']
NUM_VAR = ['year', 'pol_no_claims_discount', 'pol_duration', 'drv_age1', 'drv_age_lic1', 'drv_age2', 'drv_age_lic2', 'vh_age', 'vh_speed', 'vh_value', 'vh_weight', 'population', 'town_surface_area']

SUBMISSION_FILE_NAME = 'submission-1.csv'

***
#### Making and cleaning the data frame

In [25]:
# Data frame and input set:
score_df = pd.read_csv(SCORE_DATASET_FILE_NAME, usecols=INDEP_VAR)

# Cleaning the data frame:
score_df = score_df.fillna(0)  # Replaces NA entries with 0 for drv_age2, drv_age_lic2, vh_speed, vh_value, and vh_weight
score_df = pd.get_dummies(score_df, columns=CAT_VAR, drop_first=True)  # Converts CAT_VAR into boolean variables

scaler = StandardScaler()
scaled_var = pd.DataFrame(scaler.fit_transform((score_df[NUM_VAR])), columns=NUM_VAR)  # Normalizing the numerical variables
score_df = score_df.drop(columns=NUM_VAR)  # Dropping the old numerical variables
score_df = score_df.join(scaled_var)  # Joining the new numerical variables

***
#### Predicting and exporting the predictions

In [26]:
# Loading the Gamma GLM:
gamma_glm = joblib.load(MODEL_NAME)

# Predictions data frame:
pred_df = pd.DataFrame(gamma_glm.predict(score_df), columns=['claim_amount'])

# Policy ID data frame:
id_policy_df = pd.read_csv(SCORE_DATASET_FILE_NAME, usecols=['id_policy'])

# Exporting the submission data frame as a csv file:
id_policy_df.join(pred_df).to_csv(SUBMISSION_FILE_NAME, index=False)