# Load Data

In [1]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
X = housing["data"]
y = housing["target"]

# Prepare Data

In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# I would prefer to use Stratified Split but for the purpose of this tutorial, is SVM Regressor

In [3]:
from sklearn.preprocessing import StandardScaler

standard = StandardScaler()
X_train_scaled = standard.fit_transform(X_train)
X_test_scaled = standard.transform(X_test) #DO NOT FIT ON TEST SET, ONLY TRANSFORM!!!!
X_train_scaled
y_train

array([1.03 , 3.821, 1.726, ..., 2.221, 2.835, 3.25 ])

# Train Model

### LinearSVR
(Recall that it tends to be quicker than SVC=kernel='linear'.)

With so many Kernels to choose from you should generally:
Always try LinearSVR first (judge the bias term (i.e. modelling selection errors introduce);
Then try SVC(kernel='rbf')
If you have time then model others plus a final cross validation and grid search. 

In [4]:
from sklearn.svm import LinearSVR
from sklearn.svm import SVR

linear_reg = LinearSVR(random_state=42, C= 1.5)
linear_reg.fit(X_train_scaled, y_train)



LinearSVR(C=1.5, dual=True, epsilon=0.0, fit_intercept=True,
          intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
          random_state=42, tol=0.0001, verbose=0)

In [5]:
from sklearn.metrics import mean_squared_error
import numpy as np
y_pred = linear_reg.predict(X_train_scaled)
y_pred
MSE = mean_squared_error(y_train, y_pred)
np.sqrt(MSE) #RMSE

0.9745112892669746

#### What does this result mean? 

<b> Think about the RMSE formula </b>

RMSE = sqrt((y_actual- y-predict)^2)
so y_actual = MSE +/- y_predict

Our mean error margin will be +/- 0.9745 * $10,000 ~ $10,000.  Which isn't that great. It may be that the model is a higher order polynomial? Perhaps a better approach is to use SVC? 

### SVC

(Use RBF because we are not sure how many degrees the model should be - Benefit of SVC!)
<b> Instead of searching through the right parameters for C and gamma, the programme do the hard work! Use RandomizedSearch </b>

NOTE! I spent 1 hour trying to figure out why this was not working, when all the problem was a typo - I typed SVC NOT SVR!!!! SVC is a classifier not regressor!!! 

In [6]:
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error
from numpy import reciprocal

#Assign Initial Parameters Gamma and C
parameters = [{"gamma": [0.001, 0.1], "C": [1, 10], 'kernel': ['rbf']}]


random_search = RandomizedSearchCV(SVR(), cv=3, random_state=42,
                                   param_distributions=parameters, n_iter=10,
                                   scoring='neg_mean_squared_error', verbose=3)

random_search.fit(X_train_scaled, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 4 candidates, totalling 12 fits
[CV] kernel=rbf, gamma=0.001, C=1 ....................................
[CV] ....... kernel=rbf, gamma=0.001, C=1, score=-0.556, total=   8.0s
[CV] kernel=rbf, gamma=0.001, C=1 ....................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    7.9s remaining:    0.0s


[CV] ....... kernel=rbf, gamma=0.001, C=1, score=-0.532, total=   7.5s
[CV] kernel=rbf, gamma=0.001, C=1 ....................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   15.4s remaining:    0.0s


[CV] ....... kernel=rbf, gamma=0.001, C=1, score=-0.557, total=   7.5s
[CV] kernel=rbf, gamma=0.1, C=1 ......................................
[CV] ......... kernel=rbf, gamma=0.1, C=1, score=-0.365, total=   7.3s
[CV] kernel=rbf, gamma=0.1, C=1 ......................................
[CV] ......... kernel=rbf, gamma=0.1, C=1, score=-0.361, total=   7.2s
[CV] kernel=rbf, gamma=0.1, C=1 ......................................
[CV] ......... kernel=rbf, gamma=0.1, C=1, score=-0.368, total=   7.3s
[CV] kernel=rbf, gamma=0.001, C=10 ...................................
[CV] ...... kernel=rbf, gamma=0.001, C=10, score=-0.548, total=   7.6s
[CV] kernel=rbf, gamma=0.001, C=10 ...................................
[CV] ...... kernel=rbf, gamma=0.001, C=10, score=-0.494, total=   7.5s
[CV] kernel=rbf, gamma=0.001, C=10 ...................................
[CV] ...... kernel=rbf, gamma=0.001, C=10, score=-0.507, total=   7.5s
[CV] kernel=rbf, gamma=0.1, C=10 .....................................
[CV] .

[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:  1.7min finished


RandomizedSearchCV(cv=3, error_score=nan,
                   estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3,
                                 epsilon=0.1, gamma='scale', kernel='rbf',
                                 max_iter=-1, shrinking=True, tol=0.001,
                                 verbose=False),
                   iid='deprecated', n_iter=10, n_jobs=None,
                   param_distributions=[{'C': [1, 10], 'gamma': [0.001, 0.1],
                                         'kernel': ['rbf']}],
                   pre_dispatch='2*n_jobs', random_state=42, refit=True,
                   return_train_score=False, scoring='neg_mean_squared_error',
                   verbose=3)

In [7]:
random_search.best_estimator_

SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

#### In reality, I would search again by changing the values of gamma and C because both gamma and C returned the biggest values. But this is very computationally intensive on my 2013 8GB RAM Dell Laptop.

# Always FINE TUNE your model!
##### The value was half the value we achieved in initially in our LinearSVR. 


In [8]:
y_pred = random_search.best_estimator_.predict(X_train_scaled)
mse = mean_squared_error(y_train, y_pred)
np.sqrt(mse)

0.5522988985570297

# Test Model

In [9]:
y_pred = random_search.best_estimator_.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
np.sqrt(mse)

0.5756623788940652

#####  Seems like we are generalising somewhat good and not overfitting in comparison to LinearSVR. 

Definitely recommend going through this yourself. 