# 0. Dependencies

In [None]:
import pandas as pd
from xgboost import XGBRegressor

from numerblox.misc import Key
from numerblox.download import NumeraiClassicDownloader
from numerblox.evaluation import NumeraiClassicEvaluator
from numerblox.numerframe import create_numerframe
from numerblox.prediction_loaders import ExamplePredictions
from numerblox.submission import NumeraiClassicSubmitter

# 1. Download

`NumeraiClassicDownloader` allows you to download training and inference data with a single line of code.

In [None]:
# Download data
downloader = NumeraiClassicDownloader("data")
# Training and validation data
downloader.download_training_data("train_val", version="5.0")

# 2. Train

We use a custom Pandas DataFrame data structure called `NumerFrame` with `create_numerframe` here to easily parse the Numerai data. The usage of `NumerFrame` is completely optional, but greatly simplify the building of Numerai pipelines and experimentation with Numerai data.

We then fit a simple XGBoost regressor model.

In [None]:
df = create_numerframe("data/train_val/train.parquet")
X, y = df.sample(100).get_feature_target_pair(multi_target=False)
xgb = XGBRegressor()
xgb.fit(X.values, y.values)

# 3. Evaluate

`NumeraiClassicEvaluator` will calculate all relevant Numerai metrics. 

`ExamplePredictions` is a NumerBlox class that handles downloading of example predictions for you. This object like all other NumerBlox processors can also used end to end in a scikit-learn pipeline.

In [None]:
val_df = create_numerframe("data/train_val/validation.parquet")[:100]
val_df["prediction"] = xgb.predict(val_df.get_feature_data)
val_df["example_preds"] = ExamplePredictions("v5.0/validation_example_preds.parquet").fit_transform(None)["prediction"].values[:100]
evaluator = NumeraiClassicEvaluator()
metrics = evaluator.full_evaluation(val_df, example_col="example_preds", pred_cols=["prediction"], target_col="target")

# 4. Inference

Here again `NumeraiClassicDownloader` and `NumerFrame` are leveraged to simplify inference.

In [None]:
downloader.download_inference_data("current_round", version="5.0")
live_df = create_numerframe(file_path="data/current_round/live.parquet")
live_X, live_y = live_df.get_feature_target_pair(multi_target=False)
preds = xgb.predict(live_X)

# 5. Submission

`NumeraiClassicSubmitter` takes care of data integrity checks and submission to Numerai for you. Credentials are conveniently initialized with a `Key` object.

In [None]:
# Submit
NUMERAI_PUBLIC_ID = "YOUR_PUBLIC_ID"
NUMERAI_SECRET_KEY = "YOUR_SECRET_KEY"
key = Key(pub_id=NUMERAI_PUBLIC_ID, secret_key=NUMERAI_SECRET_KEY)
submitter = NumeraiClassicSubmitter(directory_path="sub_current_round", key=key)
# Your prediction file with 'id' as index and defined 'cols' below.
pred_dataf = pd.DataFrame(preds, index=live_df.index, columns=["prediction"])
# Only works with valid key credentials and model_name
# submitter.full_submission(dataf=pred_dataf,
#                           cols="prediction",
#                           file_name="submission.csv",
#                           model_name="MY_MODEL_NAME")

# 6. Clean up environment (optional)

All downloader and submitter have functionality to remove themselver. This is especially convenient if you are running a daily inference pipeline on your server or a cloud VM.

In [None]:
downloader.remove_base_directory()
submitter.remove_base_directory()