# Support Vector Regression (SVR)

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('../../Datasets/insurance.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
X

array([[19, 'female', 27.9, 0, 'yes', 'southwest'],
       [18, 'male', 33.77, 1, 'no', 'southeast'],
       [28, 'male', 33.0, 3, 'no', 'southeast'],
       ...,
       [18, 'female', 36.85, 0, 'no', 'southeast'],
       [21, 'female', 25.8, 0, 'no', 'southwest'],
       [61, 'female', 29.07, 0, 'yes', 'northwest']], dtype=object)

In [3]:
y = y.reshape(len(y),1)

In [4]:
# One hot encoding: turn string column into 3 different columns (countries), for 3 different categories
# Bindary vector: each country to a certain order of the columns

# Coding independent variable
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

# Create object of the column transformer class
# [0] is the index of column to apply OneHotEncoding
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1,4,5])], remainder='passthrough')

# Apply the transform method to change the column X
X = np.array(ct.fit_transform(X))

print(X)

[[1.0 0.0 0.0 ... 19 27.9 0]
 [0.0 1.0 1.0 ... 18 33.77 1]
 [0.0 1.0 1.0 ... 28 33.0 3]
 ...
 [1.0 0.0 1.0 ... 18 36.85 0]
 [1.0 0.0 1.0 ... 21 25.8 0]
 [1.0 0.0 0.0 ... 61 29.07 0]]


## Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Feature Scaling

In [6]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)

## Training the SVR model on the Training set

In [7]:
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(X_train, y_train)

  return f(**kwargs)


SVR()

## Predicting the Test set results

In [8]:
y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X_test)))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[10895.13  9724.53]
 [ 9612.78  8547.69]
 [47738.18 45702.02]
 [13664.09 12950.07]
 [11134.56  9644.25]
 [ 5175.91  4500.34]
 [ 3381.74  2198.19]
 [12553.38 11436.74]
 [ 8536.37  7537.16]
 [ 6509.44  5425.02]
 [ 8245.8   6753.04]
 [11623.6  10493.95]
 [ 8443.93  7337.75]
 [ 5246.66  4185.1 ]
 [22047.73 18310.74]
 [12042.47 10702.64]
 [13419.53 12523.6 ]
 [ 4670.84  3490.55]
 [ 7583.27  6457.84]
 [25762.14 33475.82]
 [23096.16 23967.38]
 [13808.53 12643.38]
 [11398.23 23045.57]
 [28063.87 23065.42]
 [ 2733.89  1674.63]
 [ 6012.2   4667.61]
 [ 5209.81  3732.63]
 [ 8817.72  7682.67]
 [ 4740.31  3756.62]
 [ 9691.89  8413.46]
 [ 9034.12  8059.68]
 [49814.18 48970.25]
 [13762.75 12979.36]
 [11346.66 20630.28]
 [14849.61 14571.89]
 [ 5237.19  4137.52]
 [ 9474.36  8347.16]
 [40058.76 51194.56]
 [38674.45 40003.33]
 [ 3022.48  1880.49]
 [ 6751.6   5458.05]
 [ 3714.45  2867.12]
 [24558.93 20149.32]
 [45995.64 47496.49]
 [33842.64 36149.48]
 [ 4781.49 26018.95]
 [12029.33 19749.38]
 [ 8042.07  6

## Evaluating the Model Performance

In [9]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.8869616746452476