In [1]:
# import libraries 
import numpy as np
import plotly.express as px
import pandas as pd
import statistics

import warnings
warnings.filterwarnings("ignore")

In [2]:
insurance = pd.read_csv('insurance.csv')

In [3]:
def smokerToBinary(cond):
    if cond == "yes":
        return 1
    elif cond == "no":
        return 0

insurance["binary_smoker"] = insurance["smoker"].apply(smokerToBinary)

In [4]:
def sexToBinary(sex):
    if sex == "male":
        return 1
    elif sex == "female":
        return 0

insurance["binary_sex"] = insurance["sex"].apply(sexToBinary)

In [5]:
enc_region = pd.get_dummies(insurance["region"])
insurance_cleaned = pd.concat((insurance, enc_region), axis=1)

In [6]:
insurance_cleaned.drop("smoker", axis=1, inplace=True)
insurance_cleaned.drop("sex", axis=1, inplace=True)
insurance_cleaned.drop("region", axis=1, inplace=True)

In [7]:
insurance_cleaned.head(1)

Unnamed: 0,age,bmi,children,charges,binary_smoker,binary_sex,northeast,northwest,southeast,southwest
0,19,27.9,0,16884.924,1,0,0,0,0,1


In [8]:
target = insurance_cleaned["charges"]

In [9]:
input_columns = insurance_cleaned.loc[:, insurance_cleaned.columns != "charges"]

In [10]:
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor

In [11]:
x_train, x_test, y_train, y_test = train_test_split(input_columns, target, test_size=0.2)

In [22]:
mlpr = MLPRegressor(activation='relu', alpha=0.0008, max_iter=8000, solver='lbfgs')

In [30]:
mlpr.fit(x_train, y_train)

In [24]:
y_hat = mlpr.predict(x_test)

In [25]:
MAE = np.mean(abs(y_test - y_hat))
MAE

3096.2791974734364

In [26]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_test, y_hat)

26402271.280600812

In [31]:
mlpr.score(x_test, y_test)

0.7673052945026386

In [32]:
from sklearn.model_selection import GridSearchCV

parameters = {
    "activation": ["identity", "logistic", "tanh", "relu"],
    "solver": ["lbfgs", "sgd", "adam"],
    "alpha": [0.0006, 0.0007, 0.0008, 0.0009, 0.001],
    "max_iter": [8000, 16000, 32000]
}

optimizer = GridSearchCV(MLPRegressor(), param_grid = parameters)
optimizer.fit(x_train, y_train)

In [20]:
optimizer.best_params_

{'activation': 'relu', 'alpha': 0.0008, 'max_iter': 8000, 'solver': 'lbfgs'}

In [19]:
optimizer.score(x_test, y_test)

0.7673035039821849

In [21]:
y_hat = optimizer.predict(x_test)

9430.217615838765