In [1]:
import sklearn
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
houses = fetch_california_housing()
X_train, X_rest, y_train, y_rest = train_test_split(houses.data, houses.target, test_size = 0.30)
X_valid, X_test, y_valid, y_test=train_test_split(X_rest, y_rest, test_size = 0.50)



In [2]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train.astype(np.float32))
X_valid_scaled=scaler.fit_transform(X_valid.astype(np.float32))
X_test_scaled=scaler.fit_transform(X_test.astype(np.float32))


In [11]:
from sklearn.svm import LinearSVR
svm_reg=LinearSVR(random_state=42)
svm_reg.fit(X_train_scaled,y_train)



LinearSVR(random_state=42)

In [12]:
from sklearn.metrics import mean_squared_error
y_pred=svm_reg.predict(X_train_scaled)
mean_squared_error(y_train,y_pred)

0.6602826610801231

In [13]:
from sklearn.svm import SVR
svm_poly_reg = SVR(kernel="rbf", C=100, epsilon=0.1)
svm_poly_reg.fit(X_train_scaled, y_train)

SVR(C=100)

In [14]:
y_pred_rbf=svm_poly_reg.predict(X_train_scaled)
mean_squared_error(y_train,y_pred_rbf)

0.26789900762441593

In [29]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform
params={"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
random_search=RandomizedSearchCV(SVR(),param_distributions=params,n_iter=10,verbose=2,cv=3,random_state=42)
random_search.fit(X_train_scaled,y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=   7.5s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=   7.6s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=   7.4s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=   7.3s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=   7.8s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=   7.3s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=   7.4s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=   7.3s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=   7.0s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787431; total time=   6.7s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787431; total time=   6.7s
[CV] END ....C=1.5808361216819946, gamma=0.05399

RandomizedSearchCV(cv=3, estimator=SVR(),
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fd3885c9490>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fd3885c9850>},
                   random_state=42, verbose=2)

In [30]:
random_search.best_estimator_


SVR(C=4.745401188473625, gamma=0.07969454818643928)

In [36]:
y_pred=random_search.best_estimator_.predict(X_train_scaled)
print(y_pred)

[3.02377793 2.74272778 3.51699419 ... 2.67156316 2.56558738 2.01982173]


In [35]:
mean_squared_error(y_train,y_pred)

0.3339844021627788

In [39]:
y_pred_test=random_search.best_estimator_.predict(X_test_scaled)
mean_squared_error(y_test,y_pred_test)

4.478340223596296