In [104]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing

In [105]:
housing = fetch_california_housing()
X = housing["data"]
y = housing["target"]

In [110]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [111]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


num_pipe=Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('std',StandardScaler())
])


In [113]:
X_train_sc = num_pipe.fit_transform(X_train)

In [114]:
X_test_sc = num_pipe.transform(X_test)

In [115]:
from sklearn.svm import LinearSVR, SVR

lsvr = LinearSVR()
lsvr.fit(X_train_sc, y_train)



LinearSVR()

In [118]:
from sklearn.metrics import mean_squared_error

y_pred = lsvr.predict(X_train_sc)
mse = mean_squared_error(y_train, y_pred)
mse

0.9629788644764157

Let's try a different kernel

In [120]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_dist = {'gamma':reciprocal(0.001,0.1), 'C':uniform(1,10)} 
rnd_cv = RandomizedSearchCV(SVR(), param_dist, n_iter=10, verbose=2, cv=3, random_state=42, n_jobs=-1)
rnd_cv.fit(X_train_sc, y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:   47.2s finished


RandomizedSearchCV(cv=3, estimator=SVR(), n_jobs=-1,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001A16AC7A940>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001A16B2DD370>},
                   random_state=42, verbose=2)

In [129]:
rnd_cv.best_estimator_

SVR(C=4.745401188473625, gamma=0.07969454818643928)

In [126]:
y_pred = rnd_cv.best_estimator_.predict(X_test_sc)

In [130]:
mse = mean_squared_error(y_test, y_pred)
np.sqrt(mse)

0.5929168385528746