In [1]:
%load_ext autoreload
%autoreload 2
%load_ext nb_black
%load_ext lab_black

<IPython.core.display.Javascript object>

In [2]:
# default_exp submission

<IPython.core.display.Javascript object>

# Submission

These objects allow you to easily and reliably submit predictions for Numerai Classic and Numerai Signals.

The main objects are:
1. `NumeraiClassicSubmittor`
2. `NumeraiSignalsSubmittor`

In [3]:
# hide
from nbdev.showdoc import *

<IPython.core.display.Javascript object>

In [4]:
# export
import os
import uuid
import numpy as np
import pandas as pd
from typing import Union
from copy import deepcopy
from random import choices
from tqdm.auto import tqdm
from datetime import datetime
from abc import abstractmethod
from typeguard import typechecked
from string import ascii_uppercase
from rich import print as rich_print
from numerapi import NumerAPI, SignalsAPI
from dateutil.relativedelta import relativedelta, FR

from numerai_blocks.download import BaseIO
from numerai_blocks.key import Key

<IPython.core.display.Javascript object>

## 1. BaseSubmittor

`BaseSubmittor` handles all submission logic common to Numerai Classic and Numerai Signals. Under the hood directory logic is handled by `BaseIO`.
Each submittor should inherit from `BaseSubmittor` and implement the `.save_csv` method.

In [5]:
# export
@typechecked
class BaseSubmittor(BaseIO):
    def __init__(self, directory_path: str, api: Union[NumerAPI, SignalsAPI]):
        super().__init__(directory_path)
        self.api = api

    @abstractmethod
    def save_csv(
        self,
        dataf: pd.DataFrame,
        file_name: str,
        cols: Union[str, list],
        *args,
        **kwargs,
    ):
        """
        For Numerai Classic: Save index column + 'cols' (targets) to CSV.
        For Numerai Signals: Save ticker, friday_date, data_type and signal columns to CSV.
        """
        ...

    def upload_predictions(self, file_name: str, model_name: str, *args, **kwargs):
        """
        Upload CSV file to Numerai for given model name.
        :param file_name: File name/path relative to directory_path.
        :param model_name: Lowercase raw model name (For example, 'integration_test').
        """
        full_path = str(self.dir / file_name)
        model_id = self._get_model_id(model_name=model_name)
        api_type = str(self.api.__class__.__name__)
        rich_print(
            f":airplane: {api_type}: Uploading predictions from '{full_path}' for model [bold blue]'{model_name}'[/bold blue] (model_id='{model_id}') :airplane:"
        )
        self.api.upload_predictions(
            file_path=full_path, model_id=model_id, *args, **kwargs
        )
        rich_print(
            f":thumbs_up: {api_type} submission of '{full_path}' for [bold blue]{model_name}[/bold blue] is successful! :thumbs_up:"
        )

    def full_submission(
        self,
        dataf: pd.DataFrame,
        file_name: str,
        model_name: str,
        cols: Union[str, list],
        *args,
        **kwargs,
    ):
        """
        Save DataFrame to csv and upload predictions through API.
        *args, **kwargs are passed to numerapi API.
        """
        self.save_csv(dataf=dataf, file_name=file_name, cols=cols)
        self.upload_predictions(
            file_name=file_name, model_name=model_name, *args, **kwargs
        )

    def combine_csvs(self, csv_paths: list,
                     aux_cols: list,
                     era_col: str = None,
                     pred_col: str = 'prediction') -> pd.DataFrame:
        """
        Read in csv files and combine all predictions with a rank mean.
        Multi-target predictions will be averaged out.
        :param csv_paths: List of full paths to .csv prediction files.
        :param aux_cols: ['id'] for Numerai Classic and
        For example ['ticker', 'last_friday', 'data_type'] for Numerai Signals.
        All aux_cols will be stored as index.
        :param era_col: Column indicating era ('era' or 'last_friday').
        Will be used for Grouping the rank mean if given. Skip groupby if no era_col provided.
        :param pred_col: 'prediction' for Numerai Classic and 'signal' for Numerai Signals.
        """
        all_datafs = [pd.read_csv(path, index_col=aux_cols) for path in tqdm(csv_paths)]
        final_dataf = pd.concat(all_datafs, axis="columns")
        # Remove issue of duplicate columns
        numeric_cols = final_dataf.select_dtypes(include=np.number).columns
        final_dataf.rename({k: str(v) for k, v in zip(numeric_cols, range(len(numeric_cols)))},
                           axis=1,
                           inplace=True)
        # Combine all numeric columns with rank mean
        num_dataf = final_dataf.select_dtypes(include=np.number)
        num_dataf = num_dataf.groupby(era_col) if era_col else num_dataf
        final_dataf[pred_col] = num_dataf.rank(pct=True, method="first").mean(axis=1)
        return final_dataf[[pred_col]]

    def _get_model_id(self, model_name: str) -> str:
        """
        Get ID needed for prediction uploading.
        :param model_name: Raw lowercase model name
        of Numerai model that you have access to.
        """
        return self.get_model_mapping[model_name]

    @property
    def get_model_mapping(self) -> dict:
        """Mapping between raw model names and model IDs."""
        return self.api.get_models()

    def _check_value_range(self, dataf: pd.DataFrame, cols: Union[str, list]):
        """ Check if all predictions are in range (0...1). """
        cols = [cols] if isinstance(cols, str) else cols
        for col in cols:
            if not dataf[col].between(0, 1).all():
                min_val, max_val = dataf[col].min(), dataf[col].max()
                raise ValueError(
                    f"Values must be between 0 and 1. \
Found min value of '{min_val}' and max value of '{max_val}' for column '{col}'."
                )

    def __call__(
            self,
            dataf: pd.DataFrame,
            file_name: str,
            model_name: str,
            cols: Union[str, list],
            *args,
            **kwargs,
    ):
        """
        The most common use case will be to create a CSV and submit it immediately after that.
        full_submission handles this.
        """
        self.full_submission(
            dataf=dataf,
            file_name=file_name,
            model_name=model_name,
            cols=cols,
            *args,
            **kwargs,
        )


<IPython.core.display.Javascript object>

## 2. Numerai Classic

For Numerai Classic submissions. Uses [NumerAPI](https://numerapi.readthedocs.io/en/latest/_modules/numerapi/numerapi.html) under the hood.

Note that using submittors requires a `Key` object. For more information about this, see section 8.

In [6]:
# export
@typechecked
class NumeraiClassicSubmittor(BaseSubmittor):
    """
    Submit for Numerai Classic.
    :param directory_path: Base directory to save and read prediction files from.
    :param key: Key object (numerai-blocks.key.Key) containing valid credentials for Numerai Classic.
    *args, **kwargs will be passed to NumerAPI initialization.
    """
    def __init__(self, directory_path: str, key: Key, *args, **kwargs):
        api = NumerAPI(public_id=key.pub_id, secret_key=key.secret_key, *args, **kwargs)
        super().__init__(
            directory_path=directory_path, api=api
        )

    def save_csv(
            self,
            dataf: pd.DataFrame,
            file_name: str,
            cols: str = "prediction",
            *args,
            **kwargs,
    ):
        """
        :param dataf: DataFrame which should have at least the following columns:
        1. id (as index column)
        2. cols (for example, 'prediction_mymodel').
        :param file_name: .csv file path.
        :param cols: Prediction column name.
        For example, 'prediction' or 'prediction_mymodel'.
        """
        sub_dataf = deepcopy(dataf)
        self._check_value_range(dataf=sub_dataf, cols=cols)

        full_path = str(self.dir / file_name)
        rich_print(
            f":page_facing_up: Saving predictions CSV to '{full_path}'. :page_facing_up:"
        )
        sub_dataf.loc[:, 'prediction'] = sub_dataf[cols]
        sub_dataf.loc[:, 'prediction'].to_csv(full_path, *args, **kwargs)

<IPython.core.display.Javascript object>

### NumeraiClassicSubmittor tests

In [7]:
# Initialization (Random credentials)
test_dir = "test_sub"
classic_key = Key(pub_id="UFVCTElDX0lE", secret_key="U1VQRVJfU0VDUkVUX0tFWQ==")
num_sub = NumeraiClassicSubmittor(directory_path=test_dir, key=classic_key)
assert num_sub.dir.is_dir()

# Create random dataframe
n_rows, n_columns = 100, 20
targets = [f"prediction_{i}" for i in range(n_columns)]
test_dataf = pd.DataFrame(np.random.uniform(size=(n_rows, n_columns)), columns=targets)
test_dataf["id"] = [uuid.uuid4() for _ in range(n_rows)]
test_dataf = test_dataf.set_index("id")
test_dataf.head(2)

Unnamed: 0_level_0,prediction_0,prediction_1,prediction_2,prediction_3,prediction_4,prediction_5,prediction_6,prediction_7,prediction_8,prediction_9,prediction_10,prediction_11,prediction_12,prediction_13,prediction_14,prediction_15,prediction_16,prediction_17,prediction_18,prediction_19
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
e3319ed0-f6da-4ac3-a6ff-b1dd937d375e,0.958302,0.530125,0.849753,0.594218,0.534401,0.080945,0.064195,0.362969,0.24688,0.777748,0.657266,0.12083,0.319235,0.187126,0.331809,0.355898,0.76003,0.913449,0.658361,0.390551
38ed1b4b-505d-4274-bfdb-58b3e3daad2a,0.679482,0.757334,0.334105,0.640353,0.730148,0.71325,0.032313,0.820604,0.326594,0.227624,0.848242,0.152635,0.068939,0.799661,0.837063,0.656871,0.708721,0.233305,0.792144,0.362249


<IPython.core.display.Javascript object>

In [8]:
file_name = "test.csv"
num_sub.save_csv(dataf=test_dataf, file_name=file_name, cols=targets)
num_sub.save_csv(dataf=test_dataf, file_name="test2.csv", cols=targets)
pd.read_csv(f"{test_dir}/{file_name}").head(2)

2022-02-28 20:56:29,261 INFO numexpr.utils: Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
2022-02-28 20:56:29,261 INFO numexpr.utils: NumExpr defaulting to 8 threads.


Unnamed: 0,id,prediction_0,prediction_1,prediction_2,prediction_3,prediction_4,prediction_5,prediction_6,prediction_7,prediction_8,...,prediction_10,prediction_11,prediction_12,prediction_13,prediction_14,prediction_15,prediction_16,prediction_17,prediction_18,prediction_19
0,e3319ed0-f6da-4ac3-a6ff-b1dd937d375e,0.958302,0.530125,0.849753,0.594218,0.534401,0.080945,0.064195,0.362969,0.24688,...,0.657266,0.12083,0.319235,0.187126,0.331809,0.355898,0.76003,0.913449,0.658361,0.390551
1,38ed1b4b-505d-4274-bfdb-58b3e3daad2a,0.679482,0.757334,0.334105,0.640353,0.730148,0.71325,0.032313,0.820604,0.326594,...,0.848242,0.152635,0.068939,0.799661,0.837063,0.656871,0.708721,0.233305,0.792144,0.362249


<IPython.core.display.Javascript object>

In [9]:
combined = num_sub.combine_csvs(["test_sub/test.csv", "test_sub/test2.csv"], aux_cols=['id'])
assert combined.columns == ['prediction']
combined.head(2)

  0%|          | 0/2 [00:00<?, ?it/s]

Unnamed: 0_level_0,prediction
id,Unnamed: 1_level_1
e3319ed0-f6da-4ac3-a6ff-b1dd937d375e,0.498
38ed1b4b-505d-4274-bfdb-58b3e3daad2a,0.5455


<IPython.core.display.Javascript object>

In [10]:
def test_signal_validity(
        submittor: NumeraiClassicSubmittor, dataf: pd.DataFrame
):
    """ Test value range of prediction. """
    try:
        invalid_signal = deepcopy(dataf)
        invalid_signal.iloc[0]["prediction_0"] += 10
        submittor.save_csv(
            invalid_signal,
            file_name="should_not_save.csv",
            cols=list(invalid_signal.columns),
        )
    except ValueError:
        return True
    return False

assert test_signal_validity(num_sub, test_dataf)

<IPython.core.display.Javascript object>

In [11]:
# Uncomment to save CSV and upload predictions
# num_sub.full_submission(dataf=test_dataf, file_name='test.csv', cols=targets, model_name="test")

<IPython.core.display.Javascript object>

In [12]:
# Remove contents after submitting is successful
num_sub.remove_base_directory()
assert not os.path.exists(test_dir)

<IPython.core.display.Javascript object>

## 3. Numerai Signals

For Numerai Signals submissions. Uses [SignalsAPI](https://numerapi.readthedocs.io/en/latest/_modules/numerapi/signalsapi.html) under the hood.

In [13]:
# export
@typechecked
class NumeraiSignalsSubmittor(BaseSubmittor):
    """
    Submit for Numerai Signals
    :param directory_path: Base directory to save and read prediction files from.
    :param key: Key object (numerai-blocks.key.Key) containing valid credentials for Numerai Signals.
    *args, **kwargs will be passed to SignalsAPI initialization.
    """

    def __init__(self, directory_path: str, key: Key, *args, **kwargs):
        api = SignalsAPI(
            public_id=key.pub_id, secret_key=key.secret_key, *args, **kwargs
        )
        super().__init__(
            directory_path=directory_path, api=api
        )
        self.supported_ticker_formats = [
            "cusip",
            "sedol",
            "ticker",
            "numerai_ticker",
            "bloomberg_ticker",
        ]

    def save_csv(
        self, dataf: pd.DataFrame, file_name: str, cols: list = None, *args, **kwargs
    ):
        """
        :param dataf: DataFrame which should have at least the following columns:
         1. One of supported ticker formats (cusip, sedol, ticker, numerai_ticker or bloomberg_ticker)
         2. signal (Values between 0 and 1 (exclusive))
         Additional columns for if you include validation data (optional):
         3. friday_date (YYYYMMDD format date indication)
         4. data_type ('val' and 'live' partitions)

         :param file_name: .csv file path.
         :param cols: All cols that should be passed to CSV. Defaults to 2 standard columns.
          ('bloomberg_ticker', 'signal')
        """
        if not cols:
            cols = ["bloomberg_ticker", "signal"]

        self._check_ticker_format(cols=cols)
        self._check_value_range(dataf=dataf, cols="signal")

        full_path = str(self.dir / file_name)
        rich_print(
            f":page_facing_up: Saving Signals predictions CSV to '{full_path}'. :page_facing_up:"
        )
        dataf.loc[:, cols].reset_index(drop=True).to_csv(
            full_path, index=False, *args, **kwargs
        )

    def _check_ticker_format(self, cols: list):
        """ Check for valid ticker format. """
        valid_tickers = set(cols).intersection(set(self.supported_ticker_formats))
        if not valid_tickers:
            raise NotImplementedError(
                f"No supported ticker format in {cols}). \
Supported: '{self.supported_ticker_formats}'"
            )


<IPython.core.display.Javascript object>

### NumeraiSignalsSubmittor tests

In [14]:
# Initialization (Random credentials)
test_dir_signals = "test_sub_signals"
signals_key = Key(pub_id="UFVCTElDX0lE", secret_key="U1VQRVJfU0VDUkVUX0tFWQ==")
signals_sub = NumeraiSignalsSubmittor(directory_path=test_dir_signals, key=signals_key)
assert signals_sub.dir.is_dir()

<IPython.core.display.Javascript object>

In [15]:
def create_random_signals_dataf(n_rows=5000):
    signals_test_dataf = pd.DataFrame(
        np.random.uniform(size=(n_rows, 1)), columns=["signal"]
    )
    signals_test_dataf["ticker"] = [
        "".join(choices(ascii_uppercase, k=4)) for _ in range(n_rows)
    ]
    last_friday = str((datetime.now() + relativedelta(weekday=FR(-1))).date()).replace("-", "")
    signals_test_dataf['last_friday'] = last_friday
    signals_test_dataf['data_type'] = 'live'
    return signals_test_dataf

signals_test_dataf = create_random_signals_dataf()
signals_test_dataf.head(2)

Unnamed: 0,signal,ticker,last_friday,data_type
0,0.715538,MBAK,20220225,live
1,0.144868,ULIW,20220225,live


<IPython.core.display.Javascript object>

In [16]:
signals_cols = ["signal", "ticker", "data_type", "last_friday"]
file_name = "signals_test.csv"
signals_sub.save_csv(dataf=signals_test_dataf, file_name=file_name, cols=signals_cols)
signals_sub.save_csv(dataf=signals_test_dataf, file_name="signals_test2.csv", cols=signals_cols)
pd.read_csv(f"{test_dir_signals}/{file_name}").head(2)

Unnamed: 0,signal,ticker,data_type,last_friday
0,0.715538,MBAK,live,20220225
1,0.144868,ULIW,live,20220225


<IPython.core.display.Javascript object>

In [17]:
combined_signals = signals_sub.combine_csvs(csv_paths=["test_sub_signals/signals_test.csv",
                                               "test_sub_signals/signals_test2.csv"],
                                    aux_cols=['ticker', 'last_friday', 'data_type'],
                                    era_col='last_friday',
                                    pred_col='signal')
assert combined_signals.columns == ['signal']
combined_signals.head(2)

  0%|          | 0/2 [00:00<?, ?it/s]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,signal
ticker,last_friday,data_type,Unnamed: 3_level_1
MBAK,20220225,live,0.7238
ULIW,20220225,live,0.1454


<IPython.core.display.Javascript object>

Saving Signals CSV should fail if there is no valid ticker column or if `signal` has values outside the range $(0...1)$.

In [18]:
def test_signal_validity(
    submittor: NumeraiSignalsSubmittor, signals_dataf: pd.DataFrame
):
    """ Test value range of signal. """
    try:
        invalid_signal = deepcopy(signals_dataf)
        invalid_signal.loc[0, "signal"] += 10
        submittor.save_csv(
            invalid_signal,
            file_name="should_not_save.csv",
            cols=list(invalid_signal.columns),
        )
    except ValueError:
        return True
    return False


def test_ticker_validity(
    submittor: NumeraiSignalsSubmittor, signals_dataf: pd.DataFrame
):
    """ Test safeguard if ticker column is not valid. """
    try:
        invalid_ticker = deepcopy(signals_dataf)
        invalid_ticker = invalid_ticker.rename(
            {"ticker": "not_a_valid_ticker_format"}, axis=1
        )
        submittor.save_csv(
            invalid_ticker,
            file_name="should_not_save.csv",
            cols=list(invalid_ticker.columns),
        )
    except NotImplementedError:
        return True
    return False

assert test_signal_validity(signals_sub, signals_test_dataf)
assert test_ticker_validity(signals_sub, signals_test_dataf)

<IPython.core.display.Javascript object>

In [19]:
# Uncomment to save CSV and upload predictions
# signals_sub.full_submission(dataf=signals_test_dataf, file_name='signals_test.csv', cols=signals_cols, model_name="test")

<IPython.core.display.Javascript object>

In [20]:
# Remove contents after submission is successful
signals_sub.remove_base_directory()
assert not os.path.exists(test_dir_signals)

<IPython.core.display.Javascript object>

------------------------------------------------------------

In [21]:
# hide
# Run this cell to sync all changes with library
from nbdev.export import notebook2script

notebook2script()

Converted 00_misc.ipynb.
Converted 01_download.ipynb.
Converted 02_numerframe.ipynb.
Converted 03_preprocessing.ipynb.
Converted 04_model.ipynb.
Converted 05_postprocessing.ipynb.
Converted 06_modelpipeline.ipynb.
Converted 07_evaluation.ipynb.
Converted 08_key.ipynb.
Converted 09_submission.ipynb.
Converted 10_staking.ipynb.
Converted index.ipynb.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>