In [37]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC
from sklearn.discriminant_analysis import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import loguniform, uniform
from sklearn.svm import SVR


In [3]:
housing = fetch_california_housing(as_frame=True)

In [8]:
X = housing.data
y = housing.target

X.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [14]:
svm_reg = make_pipeline(
  StandardScaler(),
  SVR()
)

cross_val_score(svm_reg, X_train, y_train, scoring="neg_mean_squared_error").mean()

-0.3511224267695225

In [34]:
X_train.reset_index()
y_train.reset_index()

num_small = 2000
np.random.seed = 42
indices = np.random.randint(low=0, high=len(X_train), size=num_small)
X_train_small = X_train.iloc[indices]
y_train_small = y_train.iloc[indices]

param_dist = {
  "svr__C": uniform(1,10),
  "svr__gamma": loguniform(0.001, 0.1),
}

rnd_search = RandomizedSearchCV(svm_reg, param_dist, n_iter=100, cv=5, random_state=42, scoring="neg_mean_squared_error")
rnd_search.fit(X_train_small, y_train_small)


In [35]:
score = pd.DataFrame(rnd_search.cv_results_).sort_values("mean_test_score", ascending=False)
score.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_svr__C,param_svr__gamma,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
69,0.065516,0.000901,0.016062,0.000168,4.636296,0.087814,"{'svr__C': 4.63629602379294, 'svr__gamma': 0.0...",-0.375128,-0.357238,-0.333865,-0.300955,-0.298865,-0.33321,0.030182,1
0,0.06846,0.003978,0.016602,0.000437,4.745401,0.079695,"{'svr__C': 4.745401188473625, 'svr__gamma': 0....",-0.378536,-0.359708,-0.336134,-0.301188,-0.30029,-0.335171,0.031161,2
27,0.070141,0.00221,0.016146,0.000181,6.979,0.069783,"{'svr__C': 6.978999788110851, 'svr__gamma': 0....",-0.38053,-0.359393,-0.334327,-0.302337,-0.301761,-0.33567,0.031106,3
26,0.081243,0.002867,0.016169,0.000133,10.394989,0.06161,"{'svr__C': 10.394989415641891, 'svr__gamma': 0...",-0.380947,-0.359653,-0.332722,-0.30308,-0.302473,-0.335775,0.030977,4
60,0.075306,0.002874,0.015933,0.000102,9.074402,0.06197,"{'svr__C': 9.074401551640625, 'svr__gamma': 0....",-0.382968,-0.360689,-0.335005,-0.304031,-0.303697,-0.337278,0.031221,5


In [36]:
best_estimator = rnd_search.best_estimator_
best_estimator.fit(X_train, y_train)

In [38]:
y_pred = best_estimator.predict(X_test)
mean_squared_error(y_test, y_pred)

0.3474270476011399