In [52]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [53]:
df=pd.read_csv("insurance.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [54]:
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [55]:
cat_col=df.select_dtypes(include='object')
print(cat_col)
cat_col['region'].unique()

         sex smoker     region
0     female    yes  southwest
1       male     no  southeast
2       male     no  southeast
3       male     no  northwest
4       male     no  northwest
...      ...    ...        ...
1333    male     no  northwest
1334  female     no  northeast
1335  female     no  southeast
1336  female     no  southwest
1337  female    yes  northwest

[1338 rows x 3 columns]


array(['southwest', 'southeast', 'northwest', 'northeast'], dtype=object)

In [56]:
num_col=df.select_dtypes(exclude='object')
num_col

Unnamed: 0,age,bmi,children,charges
0,19,27.900,0,16884.92400
1,18,33.770,1,1725.55230
2,28,33.000,3,4449.46200
3,33,22.705,0,21984.47061
4,32,28.880,0,3866.85520
...,...,...,...,...
1333,50,30.970,3,10600.54830
1334,18,31.920,0,2205.98080
1335,18,36.850,0,1629.83350
1336,21,25.800,0,2007.94500


In [57]:
from sklearn.preprocessing import OneHotEncoder
ohe=OneHotEncoder()
cat_col_encoded=ohe.fit_transform(cat_col).toarray()
cat_col_encoded

array([[1., 0., 0., ..., 0., 0., 1.],
       [0., 1., 1., ..., 0., 1., 0.],
       [0., 1., 1., ..., 0., 1., 0.],
       ...,
       [1., 0., 1., ..., 0., 1., 0.],
       [1., 0., 1., ..., 0., 0., 1.],
       [1., 0., 0., ..., 1., 0., 0.]])

In [58]:
column_names=list(ohe.get_feature_names_out())
column_names

['sex_female',
 'sex_male',
 'smoker_no',
 'smoker_yes',
 'region_northeast',
 'region_northwest',
 'region_southeast',
 'region_southwest']

In [59]:
one_hot=pd.DataFrame(cat_col_encoded,columns=column_names)
one_hot.head()


Unnamed: 0,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest
0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
1,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
2,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
3,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0
4,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0


In [60]:
one_hot=one_hot.reset_index(drop=True)# to prevent from giving null values after concating 
num_col=num_col.reset_index(drop=True)

In [61]:
final_df=pd.concat([one_hot,num_col],axis=1)
final_df.head()

Unnamed: 0,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest,age,bmi,children,charges
0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,19,27.9,0,16884.924
1,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,18,33.77,1,1725.5523
2,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,28,33.0,3,4449.462
3,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,33,22.705,0,21984.47061
4,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,32,28.88,0,3866.8552


In [62]:
x=final_df.drop("charges",axis=1)
y=final_df["charges"]

In [63]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [64]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train_transform=sc.fit_transform(x_train)
x_test_transform=sc.transform(x_test)

In [65]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [66]:
x_train.shape[1]

11

In [67]:
#initialising Sequential
model=Sequential()
# Initialize the model
# Input layer + first hidden layer
#model.add(Dense(    -->no of neurons -> activation -->input dimension ))

model.add(Dense(68, activation="relu", input_dim=x_train_transform.shape[1]))

# Hidden layers
model.add(Dense(32, activation="relu"))
model.add(Dense(24, activation="relu"))
model.add(Dense(12, activation="relu"))

# Output layer
model.add(Dense(1 , activation="linear"))

import tensorflow as tf
import tensorflow.keras.backend as K


def r2_metric(y_true, y_pred):
    SS_res =  K.sum(K.square(y_true - y_pred))
    SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
    return 1 - SS_res/(SS_tot + K.epsilon())

# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error", metrics=["mae","mse",r2_metric])

# Display model summary
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [68]:
history=model.fit(x_train_transform,
                  y_train,
                  epochs=10,
                  validation_data=(x_test_transform,y_test),
                  )

Epoch 1/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 333350080.0000 - mae: 13513.5938 - mse: 333350080.0000 - r2_metric: -1.3091 - val_loss: 323345152.0000 - val_mae: 12965.5195 - val_mse: 323345152.0000 - val_r2_metric: -1.1511
Epoch 2/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 320569312.0000 - mae: 13338.5596 - mse: 320569312.0000 - r2_metric: -1.3623 - val_loss: 322683968.0000 - val_mae: 12944.4062 - val_mse: 322683968.0000 - val_r2_metric: -1.1466
Epoch 3/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 295478176.0000 - mae: 12833.7939 - mse: 295478176.0000 - r2_metric: -1.3709 - val_loss: 318112448.0000 - val_mae: 12810.8486 - val_mse: 318112448.0000 - val_r2_metric: -1.1158
Epoch 4/10
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 302941312.0000 - mae: 12831.7402 - mse: 302941312.0000 - r2_metric: -1.2417 - val_loss: 297609280.0000 

In [69]:
## prediction
y_pred=model.predict(x_test_transform)
prediction_label=(y_pred>0.5).astype(int).ravel()
prediction_label

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1])

In [70]:
# to save model
model.save("Insurance_model.keras")