In [1]:
#importing the Libraies
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Reading the Dataset
dataset = pd.read_csv('insurance_pre.csv')

In [3]:
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [4]:
dataset=pd.get_dummies(dataset,drop_first=True)

In [5]:
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [6]:
indep=dataset[['age', 'bmi', 'children','sex_male', 'smoker_yes']]
dep=dataset['charges']

In [7]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(indep, dep, test_size=0.2, random_state=42)

# Define the parameter grid with a smaller set of values
param_grid = {'kernel': ['rbf', 'linear'],
              'C': [10, 100, 1000],
              'gamma': ['auto', 'scale']}

# Create the SVR model
svr = SVR()

# Create the GridSearchCV object with parallelization
grid = GridSearchCV(estimator=svr, param_grid=param_grid, refit=True, verbose=3, n_jobs=-1, cv=5)

# Fit the model for grid search
grid.fit(X_train, y_train)

# Print the best parameters found
print("Best Parameters: ", grid.best_params_)

# Make predictions on the test set using the best model
y_pred = grid.predict(X_test)

# Evaluate the performance of the best model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error on Test Set: ", mse)


Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    8.5s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:   15.4s finished


Best Parameters:  {'C': 1000, 'gamma': 'auto', 'kernel': 'linear'}
Mean Squared Error on Test Set:  44138323.05860212


In [8]:
# print best parameter after tuning 
#print(grid.best_params_) 
re=grid.cv_results_

print("The R_score value for best parameter {}:".format(grid.best_params_))

The R_score value for best parameter {'C': 1000, 'gamma': 'auto', 'kernel': 'linear'}:


In [9]:
table=pd.DataFrame.from_dict(re)

In [10]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.160101,0.010754,0.022587,0.004756,10,auto,rbf,"{'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}",-0.109884,-0.166779,-0.070752,-0.102686,-0.052192,-0.100459,0.039254,9
1,0.12792,0.008434,0.010595,0.00102,10,auto,linear,"{'C': 10, 'gamma': 'auto', 'kernel': 'linear'}",-0.080873,-0.094871,0.015562,-0.118474,0.021941,-0.051343,0.058515,5
2,0.125122,0.003486,0.018389,0.00049,10,scale,rbf,"{'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}",-0.116173,-0.172373,-0.063996,-0.109199,-0.044688,-0.101285,0.044568,10
3,0.13072,0.01528,0.009794,0.001599,10,scale,linear,"{'C': 10, 'gamma': 'scale', 'kernel': 'linear'}",-0.080873,-0.094871,0.015562,-0.118474,0.021941,-0.051343,0.058515,5
4,0.1617,0.021392,0.020588,0.004797,100,auto,rbf,"{'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}",-0.114679,-0.159848,-0.057213,-0.099226,-0.044009,-0.094995,0.041565,8
5,0.429735,0.258605,0.014193,0.00897,100,auto,linear,"{'C': 100, 'gamma': 'auto', 'kernel': 'linear'}",0.474988,0.52887,0.532953,0.449242,0.523419,0.501894,0.033621,3
6,0.119927,0.006128,0.018788,0.001469,100,scale,rbf,"{'C': 100, 'gamma': 'scale', 'kernel': 'rbf'}",-0.196612,-0.220507,-0.094891,-0.210705,-0.076203,-0.159784,0.061374,11
7,0.39176,0.240719,0.009994,0.001095,100,scale,linear,"{'C': 100, 'gamma': 'scale', 'kernel': 'linear'}",0.474988,0.52887,0.532953,0.449242,0.523419,0.501894,0.033621,3
8,0.229259,0.005882,0.017189,0.000748,1000,auto,rbf,"{'C': 1000, 'gamma': 'auto', 'kernel': 'rbf'}",-0.131477,-0.137548,-0.033707,-0.117072,-0.021285,-0.088218,0.050177,7
9,1.940611,0.590057,0.008795,0.0004,1000,auto,linear,"{'C': 1000, 'gamma': 'auto', 'kernel': 'linear'}",0.576909,0.673126,0.663089,0.619667,0.632493,0.633057,0.034175,1


In [11]:
age_input=float(input("Age:"))
bmi_input=float(input("BMI:"))
children_input=float(input("Children:"))
sex_male_input=int(input("Sex Male 0 or 1:"))
smoker_yes_input=int(input("Smoker Yes 0 or 1:"))

Age:40
BMI:41.52
Children:2
Sex Male 0 or 1:1
Smoker Yes 0 or 1:0


In [12]:
Future_Prediction=grid.predict([[age_input,bmi_input,children_input,sex_male_input,smoker_yes_input]])# change the paramter,play with it.
print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[7855.35946446]
