# Support Vector Regression (SVR)

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('../../Datasets/insurance.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
X

array([[19, 'female', 27.9, 0, 'yes', 'southwest'],
       [18, 'male', 33.77, 1, 'no', 'southeast'],
       [28, 'male', 33.0, 3, 'no', 'southeast'],
       ...,
       [18, 'female', 36.85, 0, 'no', 'southeast'],
       [21, 'female', 25.8, 0, 'no', 'southwest'],
       [61, 'female', 29.07, 0, 'yes', 'northwest']], dtype=object)

In [3]:
y = y.reshape(len(y),1)

In [4]:
# One hot encoding: turn string column into 3 different columns (countries), for 3 different categories
# Bindary vector: each country to a certain order of the columns

# Coding independent variable
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

# Create object of the column transformer class
# [0] is the index of column to apply OneHotEncoding
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1,4,5])], remainder='passthrough')

# Apply the transform method to change the column X
X = np.array(ct.fit_transform(X))

print(X)

[[1.0 0.0 0.0 ... 19 27.9 0]
 [0.0 1.0 1.0 ... 18 33.77 1]
 [0.0 1.0 1.0 ... 28 33.0 3]
 ...
 [1.0 0.0 1.0 ... 18 36.85 0]
 [1.0 0.0 1.0 ... 21 25.8 0]
 [1.0 0.0 0.0 ... 61 29.07 0]]


## Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Feature Scaling

In [6]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)

## Training the SVR model on the Training set

In [7]:
from sklearn.svm import SVR
regressor = SVR(C=1, gamma=0.1, kernel='rbf')
regressor.fit(X_train, y_train)

  return f(**kwargs)


SVR(C=1, gamma=0.1)

## Predicting the Test set results

In [8]:
y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X_test)))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[10891.15  9724.53]
 [ 9586.05  8547.69]
 [47825.31 45702.02]
 [13632.13 12950.07]
 [11196.61  9644.25]
 [ 5094.72  4500.34]
 [ 3379.69  2198.19]
 [12575.45 11436.74]
 [ 8538.01  7537.16]
 [ 6530.72  5425.02]
 [ 8268.25  6753.04]
 [11639.33 10493.95]
 [ 8454.5   7337.75]
 [ 5270.3   4185.1 ]
 [22042.06 18310.74]
 [12028.55 10702.64]
 [13440.77 12523.6 ]
 [ 4694.08  3490.55]
 [ 7592.39  6457.84]
 [25693.62 33475.82]
 [23256.56 23967.38]
 [13809.11 12643.38]
 [11388.62 23045.57]
 [27979.3  23065.42]
 [ 2738.55  1674.63]
 [ 6040.61  4667.61]
 [ 5254.27  3732.63]
 [ 8784.67  7682.67]
 [ 4753.47  3756.62]
 [ 9699.56  8413.46]
 [ 9038.66  8059.68]
 [49709.91 48970.25]
 [13794.93 12979.36]
 [11359.25 20630.28]
 [14880.   14571.89]
 [ 5243.32  4137.52]
 [ 9454.11  8347.16]
 [40162.4  51194.56]
 [38847.44 40003.33]
 [ 3005.    1880.49]
 [ 6789.12  5458.05]
 [ 3688.99  2867.12]
 [24294.93 20149.32]
 [46291.98 47496.49]
 [33790.39 36149.48]
 [ 4796.8  26018.95]
 [12013.08 19749.38]
 [ 8063.24  6

## Evaluating the Model Performance

In [9]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.8870768047142364

In [10]:
# Apply k-fold cross validation

from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = regressor, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)
  return f(**kwargs)


Accuracy: 82.21 %
Standard Deviation: 4.86 %


In [11]:
# Apply Grid Search to find best model and parameters

from sklearn.model_selection import GridSearchCV
parameters = [{'C': [0.25, 0.5, 0.75, 1], 'kernel': ['linear']},
              {'C': [0.25, 0.5, 0.75, 1], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]},
             {'C': [0.25, 0.5, 0.75, 1], 'kernel':['poly'], 'degree':[1,2,3,3,4,5,7,8,9,10]},
             {'C': [0.25, 0.5, 0.75, 1], 'kernel':['sigmoid'], 'degree':[1,2,3,3,4,5,7,8,9,10]}]

grid_search = GridSearchCV(estimator = regressor,
                           param_grid = parameters,
                           cv = 10,
                           n_jobs = -1
                          )
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print("Best Accuracy: {:.2f} %".format(best_accuracy*100))
print("Best Parameters:", best_parameters)

Best Accuracy: 82.21 %
Best Parameters: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}


  return f(**kwargs)
