# Health Insurance Cost Analysis and ML

![/kaggle/input/health-insurance-image/health-insurance-29195078.webp](https://thumbs.dreamstime.com/z/health-insurance-29195078.jpg?w=768)

# 1.import packages and Data

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#import our data
data= pd.read_csv('/kaggle/input/data-visualizatiion/insurance.csv', sep =',',encoding ='utf-8')

# 2.Explore Data

 Display Top 5 Rows of The Dataset

In [None]:
data.head()

Check Last 5 Rows of The Dataset


In [None]:
data.tail()

Find Shape of Our Dataset (Number of Rows And Number of Columns)


In [None]:
data.shape

 Get Information About Our Dataset Like Total Number Rows, Total Number of Columns, Datatypes of Each Column And Memory Requirement


In [None]:
data.info()

Check Null Values In The Dataset


In [None]:
data.isnull().sum()

Get Overall Statistics About The Dataset


In [None]:
data.describe(include='all')

Covert Columns From String ['sex','smoker', 'region' ] To Numerical Values


In [None]:
data['sex'].unique()


In [None]:
data['smoker'].unique()


In [None]:
data['region'].unique()

# 3.Data Visualization

In [None]:
data.columns

In [None]:
#Show smoker Distribution in our data
sns.countplot(x='smoker',data=data)

plt.show()

In [None]:
X=data['sex'].value_counts()

In [None]:
sns.countplot(x='sex',data=data)
plt.show()

In [None]:
# Histogram Number of Bathrooms 
his_age=plt.hist(data['age'])
plt.title('Histogram age dist.')
plt.show()

In [None]:
plt.figure(figsize=(8,5))
sns.scatterplot(x="age",y="charges",hue="smoker",data=data)


In [None]:
plt.figure(figsize=(8,5))
sns.scatterplot(x="smoker",y="charges",data=data)

In [None]:
sns.countplot(x='children',data=data)
plt.show()

In [None]:
plt.figure(figsize=(8,5))
sns.scatterplot(x="children",y="charges",hue="smoker",data=data)

In [None]:
sns.countplot(x='region',data=data)
plt.show()

In [None]:
data['sex']=data['sex'].map({'female':0,'male':1})
data['smoker']=data['smoker'].map({'yes':1,'no':0})
data['region']=data['region'].map({'southwest':1,'southeast':2,
                   'northwest':3,'northeast':4})

In [None]:
data.head()

# 8. Store Feature Matrix In X and Response(Target) In Vector y

In [None]:
data.columns

In [None]:
X = data.drop(['charges'],axis=1)
y = data['charges']

# 9. Train/Test split
          1. Split data into two-part: a training set and a testing set
          2. Train the model(s) on the training set
          3. Test the Model(s) on the Testing set



In [None]:
from sklearn.model_selection import train_test_split 


In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

# 10. Import the models


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor

# 11. Model Training


In [None]:
lr = LinearRegression()
lr.fit(X_train,y_train)


In [None]:
svm = SVR()
svm.fit(X_train,y_train)


In [None]:
rf = RandomForestRegressor()
rf.fit(X_train,y_train)


In [None]:
gr = GradientBoostingRegressor()
gr.fit(X_train,y_train)

# 12. Prediction on Test Data


In [None]:
y_pred1 = lr.predict(X_test)
y_pred2 = svm.predict(X_test)
y_pred3 = rf.predict(X_test)
y_pred4 = gr.predict(X_test)

df1 = pd.DataFrame({'Actual':y_test,'Lr':y_pred1,
                  'svm':y_pred2,'rf':y_pred3,'gr':y_pred4})

In [None]:
df1


# 13. Compare Performance Visually 


In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.subplot(221)
plt.plot(df1['Actual'].iloc[0:11],label='Actual')
plt.plot(df1['Lr'].iloc[0:11],label="Lr")
plt.legend()

plt.subplot(222)
plt.plot(df1['Actual'].iloc[0:11],label='Actual')
plt.plot(df1['svm'].iloc[0:11],label="svr")
plt.legend()

plt.subplot(223)
plt.plot(df1['Actual'].iloc[0:11],label='Actual')
plt.plot(df1['rf'].iloc[0:11],label="rf")
plt.legend()

plt.subplot(224)
plt.plot(df1['Actual'].iloc[0:11],label='Actual')
plt.plot(df1['gr'].iloc[0:11],label="gr")

plt.tight_layout()

plt.legend()

# 14. Evaluating the Algorithm


In [None]:
from sklearn import metrics

In [None]:
score1 = metrics.r2_score(y_test,y_pred1)
score2 = metrics.r2_score(y_test,y_pred2)
score3 = metrics.r2_score(y_test,y_pred3)
score4 = metrics.r2_score(y_test,y_pred4)

In [None]:
print(score1,score2,score3,score4)

In [None]:
s1 = metrics.mean_absolute_error(y_test,y_pred1)
s2 = metrics.mean_absolute_error(y_test,y_pred2)
s3 = metrics.mean_absolute_error(y_test,y_pred3)
s4 = metrics.mean_absolute_error(y_test,y_pred4)

In [None]:
print(s1,s2,s3,s4)

# 15. Predict Charges For New Customer

In [None]:
data = {'age' : 40,
        'sex' : 1,
        'bmi' : 40.30,
        'children' : 4,
        'smoker' : 1,
        'region' : 2}


In [None]:
df = pd.DataFrame(data,index=[0])
df

In [None]:
new_pred = gr.predict(df)
print("Medical Insurance cost for New Customer is : ",new_pred[0])

# 16. Save Model Usign Joblib


In [None]:
import joblib 
joblib.dump(gr,'model_joblib_gr')

In [None]:
mode1 =joblib.load('model_joblib_gr')

In [None]:
mode1.predict(df)

# 17. GUI

from tkinter import *
import joblib

def show_entry_fields():
    p1=float(e1.get())
    p2=float(e2.get())
    p3=float(e3.get())
    p4=float(e4.get())
    p5=float(e5.get())
    p6=float(e6.get())
    
    model = joblib.load('model_joblib_gr')
    result= model.predict([[p1,p2,p3,p4,p5,p6]])
    
    Label(master,text='Insurance cost is = ').grid(row=7)
    Label(master,text=result).grid(row=8)




master=Tk()
master.title("Insurance cost")
label = Label(master, text="Insurance cost" ,bg='black',fg='white').grid(row=0,columnspan=2)

Label(master, text="Enter your age ").grid(row=1)
Label(master, text="Enter Gender 0/1").grid(row=2)
Label(master, text="Enter BMI").grid(row=3)
Label(master, text="Enter the Number of Children").grid(row=4)
Label(master, text="Smoker Y/N 0/1 ").grid(row=5)
Label(master, text="Region 1-4 ").grid(row=6)
      
e1=Entry(master)
e2=Entry(master) 
e3=Entry(master)
e4=Entry(master)
e5=Entry(master) 
e6=Entry(master)      

e1.grid(row=1, column=1)
e2.grid(row=2, column=1)
e3.grid(row=3, column=1)
e4.grid(row=4, column=1)      
e5.grid(row=5, column=1)
e6.grid(row=6, column=1)
      
Button(master,text='Predict',command=show_entry_fields).grid()
mainloop()
