In [1]:

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler



In [2]:
# Fetch 
data = fetch_california_housing()
X, y = data.data, data.target

# Split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pca = PCA(n_components=2)

# Univariate 
selection = SelectKBest(k=1)

# Build estimator from PCA and Univariate selection:
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])

X_features = combined_features.fit(X_train, y_train).transform(X_train)

svm = SVR()

# Do grid search over k (for feature selection), C and epsilon (for SVM):
pipeline = Pipeline([("features", combined_features), ("svm", svm)])

param_grid = dict(features__pca__n_components=[1, 2, 3],
                  features__univ_select__k=[1, 2],
                  svm__C=[0.1, 1, 10],
                  svm__epsilon=[0.1, 0.2, 0.3])

grid_search = GridSearchCV(pipeline, param_grid=param_grid, verbose=10, n_jobs=-1)
grid_search.fit(X_train, y_train)
print(grid_search.best_estimator_)

Fitting 5 folds for each of 54 candidates, totalling 270 fits
Pipeline(steps=[('features',
                 FeatureUnion(transformer_list=[('pca', PCA(n_components=3)),
                                                ('univ_select',
                                                 SelectKBest(k=1))])),
                ('svm', SVR(C=10, epsilon=0.2))])
