In [18]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
import pickle

In [19]:
dataset = pd.read_csv("insurance_pre.csv")
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [20]:
dataset=pd.get_dummies(dataset,drop_first=True)
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,False,True
1,18,33.770,1,1725.55230,True,False
2,28,33.000,3,4449.46200,True,False
3,33,22.705,0,21984.47061,True,False
4,32,28.880,0,3866.85520,True,False
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,True,False
1334,18,31.920,0,2205.98080,False,False
1335,18,36.850,0,1629.83350,False,False
1336,21,25.800,0,2007.94500,False,False


In [21]:
dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [22]:
independent=dataset[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
independent

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.900,0,False,True
1,18,33.770,1,True,False
2,28,33.000,3,True,False
3,33,22.705,0,True,False
4,32,28.880,0,True,False
...,...,...,...,...,...
1333,50,30.970,3,True,False
1334,18,31.920,0,False,False
1335,18,36.850,0,False,False
1336,21,25.800,0,False,False


In [23]:
dependent = dataset[['charges']]
dependent

Unnamed: 0,charges
0,16884.92400
1,1725.55230
2,4449.46200
3,21984.47061
4,3866.85520
...,...
1333,10600.54830
1334,2205.98080
1335,1629.83350
1336,2007.94500


In [24]:
param_grid={
    "C": [0.01, 0.1, 10, 100, 200, 500],
    "kernel": ["linear", "rbf", "poly", "sigmoid"]
}
grid=GridSearchCV(SVR(),param_grid,refit=True,verbose=3,n_jobs=-1)
grid.fit(independent,dependent)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


  y = column_or_1d(y, warn=True)


In [29]:
re=grid.cv_results_
print("The R_Score value for best parameter {}:".format(grid.best_params_))

The R_Score value for best parameter {'C': 500, 'kernel': 'linear'}:


In [27]:
table=pd.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.097251,0.002265,0.017082,0.000354,0.01,linear,"{'C': 0.01, 'kernel': 'linear'}",-0.109285,-0.090622,-0.073438,-0.097328,-0.12469,-0.099073,0.017275,5
1,0.113744,0.004043,0.062329,0.002319,0.01,rbf,"{'C': 0.01, 'kernel': 'rbf'}",-0.115328,-0.113522,-0.08159,-0.106046,-0.107206,-0.104738,0.012107,10
2,0.09727,0.002837,0.020216,0.003843,0.01,poly,"{'C': 0.01, 'kernel': 'poly'}",-0.115293,-0.112538,-0.081331,-0.106025,-0.106953,-0.104428,0.012052,8
3,0.196815,0.028417,0.032418,0.00674,0.01,sigmoid,"{'C': 0.01, 'kernel': 'sigmoid'}",-0.115326,-0.113576,-0.081609,-0.106063,-0.107201,-0.104755,0.012108,11
4,0.119343,0.020497,0.018518,0.002929,0.1,linear,"{'C': 0.1, 'kernel': 'linear'}",-0.156586,-0.102518,-0.117799,-0.140213,-0.216269,-0.146677,0.039413,19
5,0.11229,0.003573,0.064345,0.003023,0.1,rbf,"{'C': 0.1, 'kernel': 'rbf'}",-0.115326,-0.113152,-0.081428,-0.105862,-0.107303,-0.104614,0.012116,9
6,0.09786,0.002884,0.018397,0.000949,0.1,poly,"{'C': 0.1, 'kernel': 'poly'}",-0.112857,-0.105099,-0.077609,-0.104969,-0.108333,-0.101773,0.012418,7
7,0.124978,0.002302,0.027201,0.000305,0.1,sigmoid,"{'C': 0.1, 'kernel': 'sigmoid'}",-0.115314,-0.113654,-0.081616,-0.106034,-0.107251,-0.104774,0.012116,12
8,0.115875,0.009054,0.016685,0.000448,10.0,linear,"{'C': 10, 'kernel': 'linear'}",-0.018943,0.029886,0.022167,0.003112,-0.066653,-0.006086,0.034672,4
9,0.112815,0.003521,0.063009,0.001534,10.0,rbf,"{'C': 10, 'kernel': 'rbf'}",-0.110409,-0.097459,-0.074488,-0.099597,-0.124428,-0.101276,0.016459,6


In [28]:
result=grid.predict([[38, 27, 2, True, False]])
result



array([6846.15092953])