# End To End

This notebook features some examples on some advanced end to end pipeline that really leverage NumerBlox's power. Consider this a testing ground on how well it integrates with sklearn and associated libraries.

## 0. Download data

In [1]:
from numerblox.numerframe import create_numerframe

df = create_numerframe("../tests/test_assets/train_int8_5_eras.parquet")

In [6]:
X, y = df.get_feature_target_pair(multi_target=False)
fncv3_cols = df.get_fncv3_features.columns.tolist()
eras = df.get_era_data
features = df.get_feature_data

## 1. Single Target 5-fold weighted XGBoost with feature neutralization.

In [3]:
# !pip install xgboost sklego

In [4]:
from xgboost import XGBRegressor
from sklego.preprocessing import ColumnSelector
from sklearn.model_selection import TimeSeriesSplit
from sklearn.pipeline import make_pipeline, make_union

from numerblox.preprocessing import GroupStatsPreProcessor
from numerblox.meta import CrossValEstimator
from numerblox.ensemble import NumeraiEnsemble
from numerblox.neutralizers import FeatureNeutralizer

# Preprocessing
gpp = GroupStatsPreProcessor(groups=['sunshine', 'rain'])
fncv3_selector = ColumnSelector(fncv3_cols)

preproc_pipe = make_union(gpp, fncv3_selector)

# Model
xgb = XGBRegressor()
cve = CrossValEstimator(estimator=xgb, cv=TimeSeriesSplit(n_splits=5))
ens = NumeraiEnsemble(donate_weighted=True)
neut = FeatureNeutralizer(proportion=0.5)
model_pipe = make_pipeline(cve, ens, neut)

full_pipe = make_pipeline(preproc_pipe, model_pipe)
full_pipe

In [11]:
# Train full model
# full_pipe.fit(X, y, featureneutralizer__features=features, featureneutralizer__eras=eras)

In [None]:
# full_pipe.predict(X)