In [1]:
#Importing the Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
#Reading csv file using pandas.read_csv and saved in data variable
data = pd.read_csv("insurance_pre.csv")

In [3]:
data

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [4]:
data=pd.get_dummies(data,drop_first=True)

In [5]:
data

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [6]:
#independent variable is an input(age,bmi,children)
independent = data[["age","bmi","children","sex_male","smoker_yes"]]
#Depedent variable is output(charges)
dependent = data[["charges"]]

In [7]:
#Creating a model for train set
#from sklearn library call model_selection function and import train_test_split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(independent,dependent, test_size = 1/3, random_state = 0)

In [8]:
#Standardization is changing every colume into same range of value
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [9]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
param_grid = {"kernel":["rbf","poly","sigmoid","linear"],
             "C":[10,100,1000,2000,3000],"gamma":["auto","scale"]}

grid = GridSearchCV(SVR(), param_grid, refit=True, verbose = 3,n_jobs=-1)
grid.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


Fitting 3 folds for each of 40 candidates, totalling 120 fits


[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done 117 out of 120 | elapsed:   14.1s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:   14.3s finished
  y = column_or_1d(y, warn=True)


GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                           epsilon=0.1, gamma='auto_deprecated', kernel='rbf',
                           max_iter=-1, shrinking=True, tol=0.001,
                           verbose=False),
             iid='warn', n_jobs=-1,
             param_grid={'C': [10, 100, 1000, 2000, 3000],
                         'gamma': ['auto', 'scale'],
                         'kernel': ['rbf', 'poly', 'sigmoid', 'linear']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [10]:
re=grid.cv_results_
grid_predictions=grid.predict(X_test)
from sklearn.metrics import r2_score
r_score=r2_score(y_test,grid_predictions)

print("The R_score value for best parameter{}:".format(grid.best_params_),r_score)

The R_score value for best parameter{'C': 3000, 'gamma': 'auto', 'kernel': 'poly'}: 0.8577898390283567


In [11]:
table=pd.DataFrame.from_dict(re)

In [12]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.085155,0.05188526,0.02617,0.0146433,10,auto,rbf,"{'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}",0.029133,-0.129874,-0.116121,-0.072174,0.071974,35
1,0.03125,3.893359e-07,0.015626,7.867412e-07,10,auto,poly,"{'C': 10, 'gamma': 'auto', 'kernel': 'poly'}",0.076672,-0.075966,-0.064307,-0.021091,0.069408,32
2,0.046877,2.346805e-06,0.015629,2.922181e-06,10,auto,sigmoid,"{'C': 10, 'gamma': 'auto', 'kernel': 'sigmoid'}",0.079325,-0.093756,-0.075898,-0.029987,0.077767,33
3,0.031253,1.784161e-06,0.005208,0.007364909,10,auto,linear,"{'C': 10, 'gamma': 'auto', 'kernel': 'linear'}",0.429231,0.216643,0.223417,0.28992,0.098712,27
4,0.046869,1.423385e-05,0.015636,1.373341e-05,10,scale,rbf,"{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}",0.029317,-0.129978,-0.11623,-0.072183,0.072111,36
5,0.040023,0.006223123,0.013219,0.003689482,10,scale,poly,"{'C': 10, 'gamma': 'scale', 'kernel': 'poly'}",0.073471,-0.074221,-0.062481,-0.020971,0.067064,31
6,0.049573,0.0038103,0.015626,3.371748e-07,10,scale,sigmoid,"{'C': 10, 'gamma': 'scale', 'kernel': 'sigmoid'}",0.078551,-0.093412,-0.075585,-0.030027,0.077248,34
7,0.031253,1.173403e-06,0.010419,0.007367101,10,scale,linear,"{'C': 10, 'gamma': 'scale', 'kernel': 'linear'}",0.429231,0.216643,0.223417,0.28992,0.098712,27
8,0.036461,0.007366089,0.010419,0.007367663,100,auto,rbf,"{'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}",0.32137,0.110819,0.106686,0.179784,0.100299,29
9,0.036462,0.007368393,0.010416,0.007365246,100,auto,poly,"{'C': 100, 'gamma': 'auto', 'kernel': 'poly'}",0.548034,0.409799,0.393006,0.450389,0.0695,22


In [17]:
age_input,bmi_input,childern_input,sex_male,smoker_yes=int(input("Enter the Age: ")),int(input("Enter the BMI: ")),int(input("Enter the Children: ")),int(input("Enter the Sex Male 0 or Female 1: ")),int(input("Enter Smoker YES 0 or NO 1: "))

Enter the Age: 24
Enter the BMI: 24
Enter the Children: 2
Enter the Sex Male 0 or Female 1: 0
Enter Smoker YES 0 or NO 1: 1


In [18]:
#Calculating prediction value
Future_Prediction=regressor.predict([[age_input,bmi_input,childern_input,sex_male,smoker_yes]])
print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[16390.90461731]
