In [21]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

In [23]:
dataSet = pd.read_csv("insurance_pre.csv")

In [25]:
dataSet = pd.get_dummies(dataSet, drop_first = True, dtype = int)
dataSet

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,0,1
1,18,33.770,1,1725.55230,1,0
2,28,33.000,3,4449.46200,1,0
3,33,22.705,0,21984.47061,1,0
4,32,28.880,0,3866.85520,1,0
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,1,0
1334,18,31.920,0,2205.98080,0,0
1335,18,36.850,0,1629.83350,0,0
1336,21,25.800,0,2007.94500,0,0


In [27]:
independent = dataSet[['age', 'bmi', 'children','sex_male', 'smoker_yes']]
dependent = dataSet['charges']

In [29]:
#split into training set and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(independent, dependent, test_size = 0.3, random_state = 0)
X_train, X_test, y_train, y_test

(      age     bmi  children  sex_male  smoker_yes
 1163   18  28.215         0         0           0
 196    39  32.800         0         0           0
 438    52  46.750         5         0           0
 183    44  26.410         0         0           0
 1298   33  27.455         2         1           0
 ...   ...     ...       ...       ...         ...
 763    27  26.030         0         1           0
 835    42  35.970         2         1           0
 1216   40  25.080         0         1           0
 559    19  35.530         0         1           0
 684    33  18.500         1         0           0
 
 [936 rows x 5 columns],
       age     bmi  children  sex_male  smoker_yes
 578    52  30.200         1         1           0
 610    47  29.370         1         0           0
 569    48  40.565         2         1           1
 1034   61  38.380         0         1           0
 198    51  18.050         0         0           0
 ...   ...     ...       ...       ...         ...
 126

In [31]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
X_train, X_test

(array([[-1.5330973 , -0.40713453, -0.89833872, -0.97676557, -0.50466988],
        [-0.03364163,  0.32855417, -0.89833872, -0.97676557, -0.50466988],
        [ 0.89459283,  2.56690911,  3.25603402, -0.97676557, -0.50466988],
        ...,
        [ 0.03776102, -0.91016269, -0.89833872,  1.02378711, -0.50466988],
        [-1.46169465,  0.76659782, -0.89833872,  1.02378711, -0.50466988],
        [-0.46205754, -1.96596021, -0.06746417, -0.97676557, -0.50466988]]),
 array([[ 0.89459283, -0.08863026, -0.06746417,  1.02378711, -0.50466988],
        [ 0.53757957, -0.22180837, -0.06746417, -0.97676557, -0.50466988],
        [ 0.60898222,  1.57449152,  0.76341038,  1.02378711,  1.98149332],
        ...,
        [ 1.10880078,  1.20785059, -0.89833872,  1.02378711, -0.50466988],
        [ 1.75142463,  1.34905148, -0.06746417,  1.02378711, -0.50466988],
        [ 1.60861933, -0.92299913, -0.89833872, -0.97676557, -0.50466988]]))

In [33]:
# Define parameter grid
param_grid = {
   'C': [10, 100, 1000, 2000, 3000],  # Regularization parameter
   #'epsilon': [0.01, 0.1, 0.2],  # Epsilon parameter
   'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],  # Type of kernel
   #'degree': [2, 3, 4],  # Degree of the polynomial kernel (only for 'poly' kernel)
   'gamma': ['scale', 'auto'],  # Kernel coefficient (only for 'rbf', 'poly', 'sigmoid')   
   #'gamma': ['scale', 'auto', 0.1, 1, 10],  # Kernel coefficient (only for 'rbf', 'poly', 'sigmoid')   
   #'coef0': [0, 0.1, 0.5, 1],  # Independent term in kernel (only for 'poly' and 'sigmoid')
   #'shrinking': [True, False],  # Whether to use the shrinking heuristic
   #'tol': [1e-3, 1e-4],  # Tolerance for stopping criterion
   #'max_iter': [1000, 5000, -1]  # Maximum number of iterations (-1 means no limit)
}

In [35]:
# Set up GridSearchCV
gridSearchCV = GridSearchCV(estimator=SVR(), param_grid=param_grid, refit=True, cv=5, n_jobs=1, verbose=3)

In [37]:
gridSearchCV.fit(X_train, y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits
[CV 1/5] END ..C=10, gamma=scale, kernel=linear;, score=0.378 total time=   0.0s
[CV 2/5] END ..C=10, gamma=scale, kernel=linear;, score=0.480 total time=   0.0s
[CV 3/5] END ..C=10, gamma=scale, kernel=linear;, score=0.318 total time=   0.0s
[CV 4/5] END ..C=10, gamma=scale, kernel=linear;, score=0.338 total time=   0.0s
[CV 5/5] END ..C=10, gamma=scale, kernel=linear;, score=0.324 total time=   0.0s
[CV 1/5] END ....C=10, gamma=scale, kernel=poly;, score=0.055 total time=   0.0s
[CV 2/5] END ....C=10, gamma=scale, kernel=poly;, score=0.071 total time=   0.0s
[CV 3/5] END ...C=10, gamma=scale, kernel=poly;, score=-0.047 total time=   0.0s
[CV 4/5] END ...C=10, gamma=scale, kernel=poly;, score=-0.024 total time=   0.0s
[CV 5/5] END ...C=10, gamma=scale, kernel=poly;, score=-0.050 total time=   0.0s
[CV 1/5] END .....C=10, gamma=scale, kernel=rbf;, score=0.004 total time=   0.0s
[CV 2/5] END .....C=10, gamma=scale, kernel=rbf

In [39]:
#print best parameter after tuning
print(gridSearchCV.best_params_)

{'C': 3000, 'gamma': 'scale', 'kernel': 'poly'}


In [41]:
age_input=float(input("Age:"))
bmi_input=float(input("BMI:"))
children_input=float(input("Children:"))
sex_male_input=int(input("Sex Male 0 or 1:"))
smoker_yes_input=int(input("Smoker Yes 0 or 1:"))

Age: 32
BMI: 43
Children: 2
Sex Male 0 or 1: 0
Smoker Yes 0 or 1: 1


In [43]:
Future_Prediction=gridSearchCV.predict([[age_input,bmi_input,children_input,sex_male_input,smoker_yes_input]])
#change the paramter,play with it.
print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[2291197.32936186]
