In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [2]:
insurance_dataset = pd.read_csv(r'D:\Harshith\Medi_insu/insurance.csv')
insurance_dataset


Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


In [3]:
insurance_dataset.describe()

Unnamed: 0,age,bmi,children,charges
count,1338.0,1338.0,1338.0,1338.0
mean,39.207025,30.663397,1.094918,13270.422265
std,14.04996,6.098187,1.205493,12110.011237
min,18.0,15.96,0.0,1121.8739
25%,27.0,26.29625,0.0,4740.28715
50%,39.0,30.4,1.0,9382.033
75%,51.0,34.69375,2.0,16639.912515
max,64.0,53.13,5.0,63770.42801


In [4]:

insurance_dataset.replace({'sex':{'male':0,'female':1}}, inplace=True)

insurance_dataset.replace({'smoker':{'yes':0,'no':1}}, inplace=True)

insurance_dataset.replace({'region':{'southeast':0,'southwest':1,'northeast':2,'northwest':3}}, inplace=True)

In [5]:
X = insurance_dataset.iloc[:,:-1]
X

Unnamed: 0,age,sex,bmi,children,smoker,region
0,19,1,27.900,0,0,1
1,18,0,33.770,1,1,0
2,28,0,33.000,3,1,0
3,33,0,22.705,0,1,3
4,32,0,28.880,0,1,3
...,...,...,...,...,...,...
1333,50,0,30.970,3,1,3
1334,18,1,31.920,0,1,2
1335,18,1,36.850,0,1,0
1336,21,1,25.800,0,1,1


In [6]:
Y = insurance_dataset.iloc[:,-1:]
Y

Unnamed: 0,charges
0,16884.92400
1,1725.55230
2,4449.46200
3,21984.47061
4,3866.85520
...,...
1333,10600.54830
1334,2205.98080
1335,1629.83350
1336,2007.94500


In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)
X.shape, X_train.shape, X_test.shape

((1338, 6), (1070, 6), (268, 6))

In [8]:
model = LinearRegression()
model.fit(X_train, Y_train)
training_data_prediction =model.predict(X_train)

In [9]:
# R squared value
r2_train = metrics.r2_score(Y_train, training_data_prediction)
print('R squared vale : ', r2_train)

R squared vale :  0.751505643411174


In [10]:
test_data_prediction =model.predict(X_test)

In [11]:
# R squared value
r2_test = metrics.r2_score(Y_test, test_data_prediction)
print('R squared value : ', r2_test)

R squared value :  0.7447273869684077


In [12]:
input_data = (21,1,25.74,0,1,0)

# changing input_data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshape the array
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)
print(prediction)

print('The insurance cost is USD ', prediction[0])

[[1246.0293569]]
The insurance cost is USD  [1246.0293569]




In [13]:
import joblib

# Save the model
joblib.dump(model, 'model.pkl')


['model.pkl']

In [14]:
%gui tk


In [None]:
import tkinter as tk
from tkinter import ttk
import joblib

def predict():
    # Retrieve values from all entry fields
    features = [float(entry1.get()), float(entry2.get()), float(entry3.get()), 
                float(entry4.get()), float(entry5.get()), float(entry6.get())]
    
    # Make a prediction using the model
    prediction = model.predict([features])
    
    # Display the result
    result_label.config(text=f"Prediction: {prediction[0]}")

# Load the model
model = joblib.load('model.pkl')

# Create the GUI
root = tk.Tk()
root.title("Model Prediction")

# Create labels and entry fields for each feature
ttk.Label(root, text="Age:").pack()
entry1 = ttk.Entry(root)
entry1.pack()

ttk.Label(root, text="Se:").pack()
entry2 = ttk.Entry(root)
entry2.pack()

ttk.Label(root, text="BMI:").pack()
entry3 = ttk.Entry(root)
entry3.pack()

ttk.Label(root, text="Children:").pack()
entry4 = ttk.Entry(root)
entry4.pack()

ttk.Label(root, text="Smoker").pack()
entry5 = ttk.Entry(root)
entry5.pack()

ttk.Label(root, text="Region:").pack()
entry6 = ttk.Entry(root)
entry6.pack()

# Create a button to trigger prediction
predict_button = ttk.Button(root, text="Predict", command=predict)
predict_button.pack()

# Label to show the prediction result
result_label = ttk.Label(root, text="Prediction:")
result_label.pack()

# Start the Tkinter event loop
root.mainloop()


