In [126]:
import sklearn
import os
import urllib
import tarfile
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [127]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
X = housing["data"]
y = housing["target"]

In [128]:
scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [130]:
lin_svr = SVR()
scores = cross_val_score(lin_svr, X_train, y_train, scoring = "neg_mean_squared_error",cv = 3)
svr_rmse = np.sqrt(-scores)
print(svr_rmse)

[1.18609925 1.16226223 1.17780844]


In [131]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(SVR(), param_distributions, n_iter=10, verbose=2, cv=3, random_state=42)
rnd_search_cv.fit(X_train_scaled, y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=  24.1s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=  24.3s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=  24.3s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=  22.0s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=  21.4s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=  21.2s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=  21.7s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=  21.4s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=  21.9s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787431; total time=  19.5s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787431; total time=  21.5s
[CV] END ....C=1.5808361216819946, gamma=0.05399

RandomizedSearchCV(cv=3, estimator=SVR(),
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001C89C950CD0>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001C89CC15C40>},
                   random_state=42, verbose=2)

In [132]:
rnd_search_cv.best_estimator_

SVR(C=4.745401188473625, gamma=0.07969454818643928)

In [133]:
y_pred = rnd_search_cv.best_estimator_.predict(X_train_scaled)
mse = mean_squared_error(y_train, y_pred)
np.sqrt(mse)

0.5727524770785357

In [134]:
y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
np.sqrt(mse)

0.5929168385528746