### Import dataset

In [8]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from interpret import show
from interpret.data import Marginal
from interpret.perf import RegressionPerf
from interpret.glassbox import ExplainableBoostingRegressor, LinearRegression, RegressionTree

# import data
df = pd.read_csv("new_train.csv")
# edit column names
df.columns = df.columns.str.replace(' ','_')
# drop columns that are repeated or could lead to bias(Gender)
df=df.drop(['State_Code', 'Country', 'Gender', 'Customer', "Effective_To_Date"], axis=1)
y=df['Total_Claim_Amount']
X=df.drop(['Total_Claim_Amount'],axis=1)
# split data into train and test set
seed = 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)

### Explainable Boosting Machine (EBM)

In [2]:
# explore the marginal of each variable
marginal = Marginal().explain_data(X_train, y_train, name = 'Train Data')
show(marginal)

## Global Explanations: What the model learned overall

In [4]:
# fit explainable boosting regressor
ebm = ExplainableBoostingRegressor(random_state=seed, n_jobs=-1)
ebm.fit(X_train, y_train)
# global explanations
ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)

## Local Explanations: How an individual prediction was made

In [5]:
ebm_local = ebm.explain_local(X_test[:5], y_test[:5], name='EBM')
show(ebm_local)

## Evaluate EBM performance

In [7]:
ebm_perf = RegressionPerf(ebm.predict).explain_perf(X_test, y_test, name='EBM')
# compute mae
mae = ebm_perf.data()['mae']
print(mae)
# residual plot
show(ebm_perf)

77.53809856748711
