In [1]:
import numpy as np
import pandas as pd
import statistics
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

In [2]:
insurance = pd.read_csv('insurance.csv')

In [7]:
def smokerToBinary(cond):
    if cond == "yes":
        return 1
    elif cond == "no":
        return 0

insurance["smoker"] = insurance["smoker"].apply(smokerToBinary)

In [8]:
def sexToBinary(sex):
    if sex == "male":
        return 1
    elif sex == "female":
        return 0

insurance["sex"] = insurance["sex"].apply(sexToBinary)

In [9]:
insurance.head(1)

Unnamed: 0,age,sex,bmi,children,smoker,region,charges,binary_smoker,binary_sex
0,19,0,27.9,0,1,southwest,16884.924,1,0


In [10]:
insurance.drop("binary_smoker", axis=1, inplace=True)
insurance.drop("binary_sex", axis=1, inplace=True)

In [12]:
insurance.head(1)

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.9,0,1,southwest,16884.924


In [14]:
def change_region(val):
    if val == 'southwest':
        return 0
    elif val == 'southeast':
        return 1
    elif val == 'northwest':
        return 2
    elif val == 'northeast':
        return 3

insurance['region'] = insurance['region'].apply(change_region)

In [15]:
insurance.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,0,27.9,0,1,0,16884.924
1,18,1,33.77,1,0,1,1725.5523
2,28,1,33.0,3,0,1,4449.462
3,33,1,22.705,0,0,2,21984.47061
4,32,1,28.88,0,0,2,3866.8552


In [16]:
target = insurance["charges"]

In [17]:
input_columns = insurance.loc[:, insurance.columns != "charges"]

In [18]:
x_train, x_test, y_train, y_test = train_test_split(input_columns, target, test_size=0.2)

In [19]:
mlpr = MLPRegressor(activation='relu', alpha=0.0008, solver='lbfgs', max_iter=8000, max_fun=30000, random_state=3)

In [20]:
mlpr.fit(x_train, y_train)

In [21]:
y_hat = mlpr.predict(x_test)

In [22]:
mlpr.score(x_test, y_test)

0.8764023191876411

In [23]:
MAE = np.mean(abs(y_test - y_hat))
MAE

2902.761355994566

In [24]:
mean_squared_error(y_test, y_hat)

20427604.48741529

In [19]:
type(x_test)

pandas.core.frame.DataFrame

In [20]:
x_test

Unnamed: 0,age,bmi,children,binary_smoker,binary_sex,northeast,northwest,southeast,southwest
251,63,32.200,2,1,0,0,0,0,1
477,25,35.625,0,0,1,0,1,0,0
1050,44,36.955,1,0,0,0,1,0,0
582,39,45.430,2,0,1,0,0,1,0
391,19,37.430,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...
510,56,32.110,1,0,1,1,0,0,0
819,33,35.530,0,1,0,0,1,0,0
601,51,31.635,0,0,1,0,1,0,0
1144,50,32.300,2,0,1,0,0,0,1


In [21]:
y_hat

array([39254.61250269,  6007.33221805, 11903.51912566, 13209.36971546,
        5131.13740594, 23837.08610721, 29797.88562197,  5633.4313995 ,
       14137.24007966,  8515.12120721,  9133.65982567,  7681.92566073,
       16972.59646873,  8231.08276618, 16750.24776151,  7398.28641202,
       13793.9277007 ,  2922.26970976,  3278.7317642 ,  3059.10602887,
        9695.32765671,  4385.57411538, 18116.49853646, 28715.68072407,
       27716.49984804, 12942.64626257, 39204.73769485,  4196.87052036,
       37335.28614261, 13331.05139787, 27541.87480755, 11690.53359399,
        1853.70970976, 11539.90659769, 12709.75801006,  9365.98362623,
        8663.84309389,  4546.09019959, 11374.36732941,  1823.8287788 ,
       30454.47514046, 35579.04398789,  2322.40562755, 13286.261121  ,
       12995.7937012 , 13256.26857031,  3089.11307822, 14390.42112415,
       31000.8418126 , 14187.95077633,  7325.6839391 ,  2088.54563484,
       10974.9321449 ,  2720.55132709, 25393.42100404, 31101.79987169,
      

In [30]:
user_data = {"age": 25, "bmi": 28.302, "children": 3, "binary_smoker": 0, "binary_sex": 1, "northeast": 0, "northwest": 0, "southeast": 0, "southwest": 1}
user_df = pd.DataFrame(data=user_data, index = [0])

In [32]:
user_df

Unnamed: 0,age,bmi,children,binary_smoker,binary_sex,northeast,northwest,southeast,southwest
0,25,28.302,3,0,1,0,0,0,1


In [33]:
user_predicted = mlpr.predict(user_df)

In [35]:
user_predicted

array([4877.01980886])