# Custom Ensemble

In [1]:
#loading dataset

from sklearn.datasets import fetch_california_housing

data = fetch_california_housing()

import pandas as pd

df = pd.DataFrame(data.data, columns = data.feature_names)
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [15]:
#extracting values

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.1)

In [21]:
#creating a custom ensemble model consisting of a custom estimator
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

class customEstimator(BaseEstimator, TransformerMixin):
    
    def __init__(self,estimator):
        self.estimator = estimator
        
    def fit(self, X,y):
        self.estimator.fit(X,y)
        return self
    
    def transform(self, X):
        return np.atleast_2d(self.estimator.predict(X)).T
        

In [26]:
#creating the model

from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

knn = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsRegressor())
])

gs = GridSearchCV(knn,
                 {'knn_n_neighbors': np.arange(5,26,5)},
                  cv = 5,
                  n_jobs = 4,
                  scoring = 'neg_mean_squared_error'
                 )


In [24]:
#implementing feature union

from sklearn.pipeline import FeatureUnion
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression

ensemble = FeatureUnion([
    ('rf', customEstimator(DecisionTreeRegressor(max_depth=5, min_samples_split=45))),
    ('knn', customEstimator(KNeighborsRegressor(n_neighbors=10)))
])

model  = Pipeline([
    ('scaler', StandardScaler()),
    ('ensemble', ensemble),
    ('combine', LinearRegression(fit_intercept= False))
])

model.fit(X_train, y_train)

print(r2_score(y_test, model.predict(X_test)))



0.7121436880014618
