In [57]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sbs
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

housing = datasets.fetch_california_housing(as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, test_size=0.2, random_state=42)

In [58]:
from sklearn.svm import SVR, LinearSVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score

lin_svr_reg = make_pipeline(StandardScaler(), LinearSVR(max_iter=5000, random_state=42))
svr_reg = make_pipeline(StandardScaler(), SVR())
lin_svr_scores = cross_val_score(lin_svr_reg, X_train, y_train)
svr_scores = cross_val_score(svr_reg, X_train, y_train)
svr_scores.mean(), lin_svr_scores.mean()

(0.7373248961801384, -0.4372320028365051)

In [59]:
from sklearn.metrics import mean_squared_error

lin_svr_reg.fit(X_train, y_train)
y_pred = lin_svr_reg.predict(X_train)
mse = mean_squared_error(y_train, y_pred)
mse

0.9595484665811848

In [63]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from scipy.stats import uniform, randint, loguniform

params = {
    'svr__kernel': ['poly', 'rbf'], 
    'svr__C': uniform(1, 10),
    'svr__epsilon': loguniform(0.1, 5), 
}

svr_rnd_search = RandomizedSearchCV(svr_reg, param_distributions=params, n_iter=100)
svr_rnd_search.fit(X_train.iloc[:2000,:], y_train.iloc[:2000])
svr_rnd_search.best_estimator_, svr_rnd_search.best_score_,

In [45]:
-cross_val_score(svr_rnd_search.best_estimator_, X_train, y_train, scoring="neg_root_mean_squared_error")

(Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('svr',
                  SVR(C=11.950649162962367, degree=7,
                      epsilon=0.25100759101865666))]),
 0.7237567118902904,
 array([3.52661794, 1.39004871, 0.77924066, ..., 3.03816734, 1.58813705,
        1.46836179]))