# Load Libraries and dataset

### Libraries

In [1]:
import pandas as pd
import numpy as np

# model building libraries
from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_absolute_error

import statsmodels.api as sm
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_recall_fscore_support as score

import matplotlib.pyplot as plt


  import pandas.util.testing as tm


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Prepare Data and create dummy variables

In [3]:
train = pd.read_csv('/content/drive/My Drive/DS_Projects/claims_severity/train_clean.csv')
#test = pd.read_csv('/content/drive/My Drive/DS_Projects/claims_severity/test.csv', error_bad_lines=False)

In [4]:
del_variables = list(train.columns)[119:122] + list(train.columns)[123:-1]
del_variables.append(train.columns[116])
del_variables.append(list(train.columns)[87])
print(del_variables)

['cont4', 'cont5', 'cont6', 'cont8', 'cont9', 'cont10', 'cont11', 'cont12', 'cont13', 'cont14', 'cont1', 'cat88']


In [5]:
dummy_variables = list(train.columns)[:87] + list(train.columns)[88:116]
print(dummy_variables)

['cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9', 'cat10', 'cat11', 'cat12', 'cat13', 'cat14', 'cat15', 'cat16', 'cat17', 'cat18', 'cat19', 'cat20', 'cat21', 'cat22', 'cat23', 'cat24', 'cat25', 'cat26', 'cat27', 'cat28', 'cat29', 'cat30', 'cat31', 'cat32', 'cat33', 'cat34', 'cat35', 'cat36', 'cat37', 'cat38', 'cat39', 'cat40', 'cat41', 'cat42', 'cat43', 'cat44', 'cat45', 'cat46', 'cat47', 'cat48', 'cat49', 'cat50', 'cat51', 'cat52', 'cat53', 'cat54', 'cat55', 'cat56', 'cat57', 'cat58', 'cat59', 'cat60', 'cat61', 'cat62', 'cat63', 'cat64', 'cat65', 'cat66', 'cat67', 'cat68', 'cat69', 'cat70', 'cat71', 'cat72', 'cat73', 'cat74', 'cat75', 'cat76', 'cat77', 'cat78', 'cat79', 'cat80', 'cat81', 'cat82', 'cat83', 'cat84', 'cat85', 'cat86', 'cat87', 'cat89', 'cat90', 'cat91', 'cat92', 'cat93', 'cat94', 'cat95', 'cat96', 'cat97', 'cat98', 'cat99', 'cat100', 'cat101', 'cat102', 'cat103', 'cat104', 'cat105', 'cat106', 'cat107', 'cat108', 'cat109', 'cat110', 'cat111', 'cat11

In [6]:
train = pd.get_dummies(train, columns=list(train.columns)[:87] + list(train.columns)[88:116])
train.drop(del_variables, axis=1, inplace=True)

In [7]:
#variables = dummy_variables + ['cont2', 'cont3', 'cont7', 'loss']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(train.drop('loss', axis=1), train['loss'], test_size=0.2,
                                                   random_state=1234)

In [9]:
X_train.describe()

Unnamed: 0,cont2,cont3,cont7,cat1_A,cat1_B,cat2_A,cat2_B,cat3_A,cat3_B,cat4_A,cat4_B,cat5_A,cat5_B,cat6_A,cat6_B,cat7_A,cat7_B,cat8_A,cat8_B,cat9_A,cat9_B,cat10_A,cat10_B,cat11_A,cat11_B,cat12_A,cat12_B,cat13_A,cat13_B,cat14_A,cat14_B,cat15_A,cat15_B,cat16_A,cat16_B,cat17_A,cat17_B,cat18_A,cat18_B,cat19_A,...,cat116_LU,cat116_LV,cat116_LW,cat116_LX,cat116_LY,cat116_M,cat116_MA,cat116_MB,cat116_MC,cat116_MD,cat116_ME,cat116_MF,cat116_MG,cat116_MH,cat116_MI,cat116_MJ,cat116_MK,cat116_ML,cat116_MM,cat116_MN,cat116_MO,cat116_MP,cat116_MQ,cat116_MR,cat116_MS,cat116_MT,cat116_MU,cat116_MV,cat116_MW,cat116_O,cat116_P,cat116_Q,cat116_R,cat116_S,cat116_T,cat116_U,cat116_V,cat116_W,cat116_X,cat116_Y
count,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,...,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0,150654.0
mean,0.507241,0.498881,0.48472,0.751563,0.248437,0.565826,0.434174,0.944887,0.055113,0.682053,0.317947,0.657108,0.342892,0.699484,0.300516,0.975567,0.024433,0.941243,0.058757,0.599632,0.400368,0.850187,0.149813,0.892807,0.107193,0.848062,0.151938,0.896531,0.103469,0.987707,0.012293,0.999827,0.000173,0.965205,0.034795,0.99323,0.00677,0.994743,0.005257,0.990389,...,7e-06,0.0016,0.003352,0.001805,0.00697,1.3e-05,0.000438,7e-06,0.001865,0.010342,0.0015,0.0,0.001872,0.0,0.000558,0.003764,0.000166,8e-05,5.3e-05,6e-05,0.000186,0.000385,0.000219,9.3e-05,7e-06,0.0,0.000232,7e-06,0.000119,1.3e-05,1.3e-05,1.3e-05,2.7e-05,1.3e-05,7e-06,6.6e-05,7e-06,7e-06,7e-06,6e-05
std,0.20732,0.201752,0.178246,0.432108,0.432108,0.49565,0.49565,0.228202,0.228202,0.46568,0.46568,0.474677,0.474677,0.458484,0.458484,0.154391,0.154391,0.23517,0.23517,0.489974,0.489974,0.35689,0.35689,0.309359,0.309359,0.358962,0.358962,0.304571,0.304571,0.110191,0.110191,0.013136,0.013136,0.183261,0.183261,0.082004,0.082004,0.072315,0.072315,0.097566,...,0.002576,0.039964,0.0578,0.042452,0.083193,0.003644,0.020926,0.002576,0.043148,0.101167,0.038703,0.0,0.043224,0.0,0.023606,0.061233,0.012881,0.008925,0.007287,0.007729,0.013632,0.019617,0.014799,0.00964,0.002576,0.0,0.01524,0.002576,0.01093,0.003644,0.003644,0.003644,0.005153,0.003644,0.002576,0.008147,0.002576,0.002576,0.002576,0.007729
min,0.001149,0.002634,0.069503,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.358319,0.336963,0.350203,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.555782,0.527991,0.438285,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.681761,0.634224,0.590687,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.862654,0.944251,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [10]:
'cont2' in X_train.columns

True

In [11]:
X_train.shape

(150654, 1138)

In [12]:
y_train.shape

(150654,)

# Modeling

### Linear Model

In [None]:
#X = sm.add_constant(X_train)
est = sm.OLS(y_train, X_train).fit()
est.summary()

  return self.params / self.bse
  return (a < x) & (x < b)
  return (a < x) & (x < b)
  cond2 = cond0 & (x <= _a)


0,1,2,3
Dep. Variable:,loss,R-squared:,0.531
Model:,OLS,Adj. R-squared:,0.528
Method:,Least Squares,F-statistic:,182.7
Date:,"Sat, 19 Sep 2020",Prob (F-statistic):,0.0
Time:,13:21:32,Log-Likelihood:,-1356900.0
No. Observations:,150654,AIC:,2716000.0
Df Residuals:,149725,BIC:,2725000.0
Df Model:,928,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
cont2,1353.1317,53.180,25.444,0.000,1248.900,1457.363
cont3,617.4173,99.297,6.218,0.000,422.797,812.038
cont7,684.0058,119.774,5.711,0.000,449.251,918.761
cat1_A,281.1237,18.717,15.020,0.000,244.439,317.808
cat1_B,42.6973,18.768,2.275,0.023,5.913,79.482
cat2_A,530.2458,191.671,2.766,0.006,154.575,905.917
cat2_B,-206.4248,191.405,-1.078,0.281,-581.574,168.724
cat3_A,-118.4105,96.946,-1.221,0.222,-308.423,71.602
cat3_B,442.2315,100.308,4.409,0.000,245.630,638.833

0,1,2,3
Omnibus:,99566.572,Durbin-Watson:,1.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5610813.659
Skew:,2.53,Prob(JB):,0.0
Kurtosis:,32.466,Cond. No.,4e+16


Adjusted R-Square: 0.531

In [None]:
y_pred = est.predict(X_test)

In [None]:
mean_absolute_error(y_test, y_pred)

1304.9119840171602

In [None]:
y_pred.mean()

3025.4123995341824

### Random Forest Regressor

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
rg1 = RandomForestRegressor(n_jobs=-1, random_state=2525)

rg1.fit(X_train,y_train)

y_pred = rg1.predict(X_test)
mean_absolute_error(y_test, y_pred)

1245.3555110879372

In [None]:
rg2 = RandomForestRegressor(n_estimators=120, n_jobs=-1, random_state=2525)

rg2.fit(X_train,y_train)

y_pred = rg2.predict(X_test)
mean_absolute_error(y_test, y_pred)

1244.3078535577977

### Gradient Boosting Regressor

In [14]:
from sklearn.ensemble import GradientBoostingRegressor

In [16]:
gb1 = GradientBoostingRegressor(random_state=2525)
gb1.fit(X_train,y_train)

GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.1, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=100,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=2525, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

In [17]:
y_pred = gb1.predict(X_test)
mean_absolute_error(y_test, y_pred)

1266.7901906644993

GB Model 2

In [18]:
gb2 = GradientBoostingRegressor(random_state=2525, n_estimators=200)
gb2.fit(X_train,y_train)

GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.1, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=200,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=2525, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

In [19]:
y_pred = gb2.predict(X_test)
mean_absolute_error(y_test, y_pred)

1237.6168790854867

GB: Model 3

In [20]:
gb3 = GradientBoostingRegressor(random_state=2525, n_estimators=300)
gb3.fit(X_train,y_train)

GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.1, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=300,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=2525, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

In [21]:
y_pred = gb3.predict(X_test)
mean_absolute_error(y_test, y_pred)

1230.1001412172816

GB: Model 4

In [22]:
gb4 = GradientBoostingRegressor(random_state=2525, n_estimators=400)
gb4.fit(X_train,y_train)

GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.1, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=400,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=2525, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

In [23]:
y_pred = gb4.predict(X_test)
mean_absolute_error(y_test, y_pred)

1226.1763225726934

GB: Model 5

In [24]:
gb5 = GradientBoostingRegressor(random_state=2525, n_estimators=500)
gb5.fit(X_train,y_train)

GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.1, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=500,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=2525, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

In [26]:
y_pred = gb5.predict(X_test)
mean_absolute_error(y_test, y_pred)

1223.5805546267857

GB: Model 6

In [27]:
gb6 = GradientBoostingRegressor(random_state=2525, n_estimators=500, learning_rate=0.05)
gb6.fit(X_train,y_train)

GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.05, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=500,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=2525, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

In [28]:
y_pred = gb6.predict(X_test)
mean_absolute_error(y_test, y_pred)

1231.8781595516214

In [33]:
!pip install scikit-optimize

Collecting scikit-optimize
[?25l  Downloading https://files.pythonhosted.org/packages/8b/03/be33e89f55866065a02e515c5b319304a801a9f1027a9b311a9b1d1f8dc7/scikit_optimize-0.8.1-py2.py3-none-any.whl (101kB)
[K     |███▎                            | 10kB 12.5MB/s eta 0:00:01[K     |██████▌                         | 20kB 1.8MB/s eta 0:00:01[K     |█████████▊                      | 30kB 2.2MB/s eta 0:00:01[K     |█████████████                   | 40kB 2.5MB/s eta 0:00:01[K     |████████████████▏               | 51kB 2.0MB/s eta 0:00:01[K     |███████████████████▍            | 61kB 2.3MB/s eta 0:00:01[K     |██████████████████████▊         | 71kB 2.5MB/s eta 0:00:01[K     |██████████████████████████      | 81kB 2.7MB/s eta 0:00:01[K     |█████████████████████████████▏  | 92kB 2.8MB/s eta 0:00:01[K     |████████████████████████████████| 102kB 2.3MB/s 
Collecting pyaml>=16.9
  Downloading https://files.pythonhosted.org/packages/15/c4/1310a054d33abc318426a956e7d6df0df76a6ddf

#### Hyper parameter estimation for Gradient Boosting

In [43]:
from skopt.space import Real, Integer
from skopt.utils import use_named_args

n_features = X_train.shape[1]
space  = [Integer(1, 5, name='max_depth'),
          Real(10**-5, 10**0, "log-uniform", name='learning_rate'),
          Integer(1, n_features, name='max_features'),
          Integer(2, 100, name='min_samples_split'),
          Integer(1, 100, name='min_samples_leaf')]

# this decorator allows your objective function to receive a the parameters as
# keyword arguments. This is particularly convenient when you want to set
# scikit-learn estimator parameters
@use_named_args(space)
def objective(**params):
    gb1.set_params(**params)

    return -np.mean(cross_val_score(gb1, X_train, y_train, cv=5, n_jobs=-1,
                                    scoring="neg_mean_absolute_error"))


In [44]:
from sklearn.model_selection import cross_val_score
from skopt import gp_minimize
res_gp = gp_minimize(objective, space, n_calls=50)

"Best score=%.4f" % res_gp.fun


'Best score=1214.5313'

In [45]:
print("""Best parameters:
- max_depth=%d
- learning_rate=%.6f
- max_features=%d
- min_samples_split=%d
- min_samples_leaf=%d""" % (res_gp.x[0], res_gp.x[1],
                            res_gp.x[2], res_gp.x[3],
                            res_gp.x[4]))

Best parameters:
- max_depth=5
- learning_rate=0.358839
- max_features=173
- min_samples_split=25
- min_samples_leaf=91


### Multi-layer Perceptron regressor (Neural Network)


In [13]:
from sklearn.neural_network import MLPRegressor

MLPR Model 1

In [None]:
nn1 = MLPRegressor(random_state=1)
nn1.fit(X_train, y_train)



MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(100,), learning_rate='constant',
             learning_rate_init=0.001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=1, shuffle=True, solver='adam',
             tol=0.0001, validation_fraction=0.1, verbose=False,
             warm_start=False)

In [None]:
y_pred_nn1 = nn1.predict(X_test)
nn1_result = mean_absolute_error(y_test, y_pred_nn1)
print(nn1_result)

1202.3596050580013


MLPR Model 2

In [15]:
# add one more layer
nn2 =MLPRegressor(hidden_layer_sizes=(100,100,),random_state=1)
nn2.fit(X_train, y_train)



MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(100, 100), learning_rate='constant',
             learning_rate_init=0.001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=1, shuffle=True, solver='adam',
             tol=0.0001, validation_fraction=0.1, verbose=False,
             warm_start=False)

In [16]:
y_pred_nn2 = nn2.predict(X_test)
nn2_result = mean_absolute_error(y_test, y_pred_nn2)
print(nn2_result)

1309.0007252635398


Actually by adding layer, the result got worsen. It may have overfit to the training dataset.

NN Model 3

In [15]:
nn3 =MLPRegressor(hidden_layer_sizes=(30,30,),random_state=1, alpha=0.001)
nn3.fit(X_train, y_train)



MLPRegressor(activation='relu', alpha=0.001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(30, 30), learning_rate='constant',
             learning_rate_init=0.001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=1, shuffle=True, solver='adam',
             tol=0.0001, validation_fraction=0.1, verbose=False,
             warm_start=False)

In [19]:
y_pred_nn3 = nn3.predict(X_test)
nn3_result = mean_absolute_error(y_test, y_pred_nn3)
print(nn3_result)

1219.7420347587358


In [14]:
nn4 = MLPRegressor(random_state=1, hidden_layer_sizes=(80,))
nn4.fit(X_train, y_train)



MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(80,), learning_rate='constant',
             learning_rate_init=0.001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=1, shuffle=True, solver='adam',
             tol=0.0001, validation_fraction=0.1, verbose=False,
             warm_start=False)

In [15]:
y_pred_nn4 = nn4.predict(X_test)
nn4_result = mean_absolute_error(y_test, y_pred_nn4)
print(nn4_result)

1193.0783006516408


Let's keep size down the hidden layers

In [16]:
nn5 = MLPRegressor(random_state=1, hidden_layer_sizes=(70,))
nn5.fit(X_train, y_train)



MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(70,), learning_rate='constant',
             learning_rate_init=0.001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=1, shuffle=True, solver='adam',
             tol=0.0001, validation_fraction=0.1, verbose=False,
             warm_start=False)

In [17]:
y_pred_nn5 = nn5.predict(X_test)
nn5_result = mean_absolute_error(y_test, y_pred_nn5)
print(nn5_result)

1193.1677359163489


In [19]:
nn6 = MLPRegressor(random_state=1, hidden_layer_sizes=(80,), activation='tanh')
nn6.fit(X_train, y_train)



MLPRegressor(activation='tanh', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(80,), learning_rate='constant',
             learning_rate_init=0.001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=1, shuffle=True, solver='adam',
             tol=0.0001, validation_fraction=0.1, verbose=False,
             warm_start=False)

In [20]:
y_pred_nn6 = nn6.predict(X_test)
nn6_result = mean_absolute_error(y_test, y_pred_nn6)
print(nn6_result)

1241.6729772426604


In [21]:
nn7 = MLPRegressor(random_state=1, hidden_layer_sizes=(80,), alpha=0.001)
nn7.fit(X_train, y_train)



MLPRegressor(activation='relu', alpha=0.001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(80,), learning_rate='constant',
             learning_rate_init=0.001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=1, shuffle=True, solver='adam',
             tol=0.0001, validation_fraction=0.1, verbose=False,
             warm_start=False)

In [22]:
y_pred_nn7 = nn7.predict(X_test)
nn7_result = mean_absolute_error(y_test, y_pred_nn7)
print(nn7_result)

1196.2222961903192


In [23]:
nn8 = MLPRegressor(random_state=1, hidden_layer_sizes=(80,), alpha=0.01)
nn8.fit(X_train, y_train)



MLPRegressor(activation='relu', alpha=0.01, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(80,), learning_rate='constant',
             learning_rate_init=0.001, max_fun=15000, max_iter=200,
             momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
             power_t=0.5, random_state=1, shuffle=True, solver='adam',
             tol=0.0001, validation_fraction=0.1, verbose=False,
             warm_start=False)

In [24]:
y_pred_nn8 = nn8.predict(X_test)
nn8_result = mean_absolute_error(y_test, y_pred_nn8)
print(nn8_result)

1196.0794979140849


# Summary

We are evaluating models based on the mean absolute error rate for each model. <br>
Linear Regression Model - as a bench mark
- Score: 1304.912 

Random Forest Regressor
-  Best Score: 1244.31
-  It is better than linear regression

Gradient Boosting Regressor
- Best score: 1214.53
- Better than Linear Regression and Random Forest

Multi-layer Perception Regressor
- Best score: 1193.08
- With one hidden layer of 80 neurons