In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

housing = datasets.fetch_california_housing(as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.2, random_state=42)

In [2]:
from sklearn.svm import SVR, LinearSVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score

lin_svr_reg = make_pipeline(StandardScaler(), LinearSVR(max_iter=5000, random_state=42))
svr_reg = make_pipeline(StandardScaler(), SVR())
lin_svr_scores = cross_val_score(lin_svr_reg, X_train, y_train)
svr_scores = cross_val_score(svr_reg, X_train, y_train)
svr_scores.mean(), lin_svr_scores.mean()

(np.float64(0.73732884298049), np.float64(-0.4372320028365021))

In [None]:
from sklearn.metrics import root_mean_squared_error

lin_svr_reg.fit(X_train, y_train)
y_pred = lin_svr_reg.predict(X_train)
mse = root_mean_squared_error(y_train, y_pred)
mse

np.float64(0.9795654478293888)

In [4]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from scipy.stats import uniform, randint, loguniform

params = {
    'svr__kernel': ['poly', 'rbf'], 
    'svr__C': uniform(1, 10),
    'svr__epsilon': loguniform(0.1, 5), 
}

svr_rnd_search = RandomizedSearchCV(svr_reg, param_distributions=params, n_iter=100)
svr_rnd_search.fit(X_train.iloc[:2000,:], y_train.iloc[:2000])
svr_rnd_search.best_estimator_, svr_rnd_search.best_score_,

(Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('svr',
                  SVR(C=np.float64(6.380199280679454),
                      epsilon=np.float64(0.18522156381258137)))]),
 np.float64(0.7614711229087457))

In [5]:
-cross_val_score(svr_rnd_search.best_estimator_, X_train, y_train, scoring="neg_root_mean_squared_error")

array([0.57587797, 0.55721378, 0.56630829, 0.55830183, 0.58144002])

In [13]:
y_pred = svr_rnd_search.best_estimator_.predict(X_test)
rmse = root_mean_squared_error(y_test, y_pred)
rmse

np.float64(0.5811237682804774)