In [103]:
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin
from sklearn.linear_model import LassoLarsCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.pipeline import make_pipeline
from sklearn.utils import check_array

In [140]:
class StackingEstimator(BaseEstimator):
    
    def __init__(self, estimator):
        self.estimator = estimator
        
    def fit(self, X, y=None, **fit_params):
        self.estimator.fit(X, y, **fit_params)
        return self
    
    def transform(self, X):
        X = check_array(X)
        X_transformed = np.copy(X)
        X_transformed = np.hstack((np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed))
        print(X_transformed.shape)
        print(X_transformed)
        return X_transformed

In [141]:
train = pd.read_csv("data/train.csv", index_col="ID")
test = pd.read_csv("data/test.csv", index_col="ID")

In [142]:
for c in train.columns:
    if train[c].dtype == "object":
        lbl = preprocessing.LabelEncoder()
        lbl.fit(list(train[c].values) + list(test[c].values))
        train[c] = lbl.transform(list(train[c].values))
        test[c] = lbl.transform(list(test[c].values))
        
y_train = train["y"].values

### Stacked Pipeline - Automatic

In [144]:
stacked_pipeline = make_pipeline(
    StackingEstimator(estimator=LassoLarsCV(normalize=True)),
    StackingEstimator(estimator=GradientBoostingRegressor(learning_rate=0.001, loss="huber", max_depth=3, max_features=0.55, min_samples_leaf=18, min_samples_split=14, subsample=0.7, random_state=1)),
    LassoLarsCV()
)

In [145]:
stacked_pipeline.fit(train.drop("y", axis=1), y_train)

  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny))


(4209, 377)
[[ 104.48433317   37.           23.         ...,    0.            0.            0.        ]
 [  96.12049845   37.           21.         ...,    0.            0.            0.        ]
 [  79.98099226   24.           24.         ...,    0.            0.            0.        ]
 ..., 
 [ 109.14565988   10.           23.         ...,    0.            0.            0.        ]
 [  92.93850915   11.           19.         ...,    0.            0.            0.        ]
 [  95.75150277   52.           19.         ...,    0.            0.            0.        ]]
(4209, 378)
[[  99.88477548  104.48433317   37.         ...,    0.            0.            0.        ]
 [  98.540056     96.12049845   37.         ...,    0.            0.            0.        ]
 [  97.11279097   79.98099226   24.         ...,    0.            0.            0.        ]
 ..., 
 [ 100.37431896  109.14565988   10.         ...,    0.            0.            0.        ]
 [  98.60704572   92.93850915   11.      



Pipeline(steps=[('stackingestimator-1', StackingEstimator(estimator=LassoLarsCV(copy_X=True, cv=None, eps=2.2204460492503131e-16,
      fit_intercept=True, max_iter=500, max_n_alphas=1000, n_jobs=1,
      normalize=True, positive=False, precompute='auto', verbose=False))), ('stackingestimator-2', StackingEst...x_n_alphas=1000, n_jobs=1,
      normalize=True, positive=False, precompute='auto', verbose=False))])

In [154]:
results = stacked_pipeline.predict(train.drop("y", axis=1))

(4209, 377)
[[ 104.48433317   37.           23.         ...,    0.            0.            0.        ]
 [  96.12049845   37.           21.         ...,    0.            0.            0.        ]
 [  79.98099226   24.           24.         ...,    0.            0.            0.        ]
 ..., 
 [ 109.14565988   10.           23.         ...,    0.            0.            0.        ]
 [  92.93850915   11.           19.         ...,    0.            0.            0.        ]
 [  95.75150277   52.           19.         ...,    0.            0.            0.        ]]
(4209, 378)
[[  99.88477548  104.48433317   37.         ...,    0.            0.            0.        ]
 [  98.540056     96.12049845   37.         ...,    0.            0.            0.        ]
 [  97.11279097   79.98099226   24.         ...,    0.            0.            0.        ]
 ..., 
 [ 100.37431896  109.14565988   10.         ...,    0.            0.            0.        ]
 [  98.60704572   92.93850915   11.      

In [155]:
results

array([ 107.02741057,   93.58506525,   79.25021499, ...,  111.93607051,
         94.22134843,   94.55766098])

### Stacked Pipeline - Manual

In [132]:
X = train.drop("y", axis=1)
estimator = LassoLarsCV(normalize=True)
estimator.fit(X, y_train)

  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny))


LassoLarsCV(copy_X=True, cv=None, eps=2.2204460492503131e-16,
      fit_intercept=True, max_iter=500, max_n_alphas=1000, n_jobs=1,
      normalize=True, positive=False, precompute='auto', verbose=False)

In [148]:
X = check_array(X)

In [149]:
X = check_array(X)
X_transformed = np.copy(X)
X_transformed = np.hstack((np.reshape(estimator.predict(X), (-1, 1)), X_transformed))

In [150]:
X_transformed

array([[ 104.48433317,   37.        ,   23.        , ...,    0.        ,
           0.        ,    0.        ],
       [  96.12049845,   37.        ,   21.        , ...,    0.        ,
           0.        ,    0.        ],
       [  79.98099226,   24.        ,   24.        , ...,    0.        ,
           0.        ,    0.        ],
       ..., 
       [ 109.14565988,   10.        ,   23.        , ...,    0.        ,
           0.        ,    0.        ],
       [  92.93850915,   11.        ,   19.        , ...,    0.        ,
           0.        ,    0.        ],
       [  95.75150277,   52.        ,   19.        , ...,    0.        ,
           0.        ,    0.        ]])

In [151]:
X_transformed.shape

(4209, 377)

In [152]:
estimator_2 = GradientBoostingRegressor(learning_rate=0.001, loss="huber", max_depth=3, max_features=0.55, min_samples_leaf=18, min_samples_split=14, subsample=0.7, random_state=1)
estimator_2.fit(X_transformed, y_train)
X_transformed2 = np.copy(X_transformed)
X_transformed2 = np.hstack((np.reshape(estimator_2.predict(X_transformed), (-1, 1)), X_transformed2))

In [153]:
X_transformed2

array([[  99.88477548,  104.48433317,   37.        , ...,    0.        ,
           0.        ,    0.        ],
       [  98.540056  ,   96.12049845,   37.        , ...,    0.        ,
           0.        ,    0.        ],
       [  97.11279097,   79.98099226,   24.        , ...,    0.        ,
           0.        ,    0.        ],
       ..., 
       [ 100.37431896,  109.14565988,   10.        , ...,    0.        ,
           0.        ,    0.        ],
       [  98.60704572,   92.93850915,   11.        , ...,    0.        ,
           0.        ,    0.        ],
       [  98.63825961,   95.75150277,   52.        , ...,    0.        ,
           0.        ,    0.        ]])

In [137]:
X_transformed2.shape

(4209, 378)

In [156]:
estimator_3 = LassoLarsCV()
estimator_3.fit(X_transformed2, y_train)
estimator_3.predict(X_transformed2)

  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny))


array([ 107.02741057,   93.58506525,   79.25021499, ...,  111.93607051,
         94.22134843,   94.55766098])