In [1]:
%load_ext autoreload
%autoreload 2
%load_ext nb_black
%load_ext lab_black

<IPython.core.display.Javascript object>

In [2]:
# default_exp submission

<IPython.core.display.Javascript object>

# Submission

In [3]:
# hide
from nbdev.showdoc import *

<IPython.core.display.Javascript object>

In [4]:
# export
import os
import uuid
import numpy as np
import pandas as pd
from typing import Union
from copy import deepcopy
from random import choices
from datetime import datetime
from abc import abstractmethod
from typeguard import typechecked
from string import ascii_uppercase
from rich import print as rich_print
from numerapi import NumerAPI, SignalsAPI
from dateutil.relativedelta import relativedelta, FR

from numerai_blocks.numerframe import NumerFrame
from numerai_blocks.download import BaseIO
from numerai_blocks.key import Key

<IPython.core.display.Javascript object>

## 1. Base

In [5]:
# export
@typechecked
class BaseSubmittor(BaseIO):
    def __init__(self, directory_path: str, api: Union[NumerAPI, SignalsAPI]):
        super().__init__(directory_path)
        self.api = api

    @abstractmethod
    def save_csv(
        self,
        dataf: Union[pd.DataFrame, NumerFrame],
        file_name: str,
        cols: Union[str, list],
        *args,
        **kwargs,
    ):
        """
        For Numerai Classic: Save index column + 'cols' (targets) to CSV.
        For Numerai Signals: Save ticker, friday_date, data_type and signal columns to CSV.
        """
        ...

    def upload_predictions(self, file_name: str, model_name: str, *args, **kwargs):
        """
        Upload CSV file to Numerai for given model name.
        :param file_name: File name/path relative to directory_path.
        :param model_name: Lowercase raw model name (For example, 'integration_test').
        """
        full_path = str(self.dir / file_name)
        model_id = self._get_model_id(model_name=model_name)
        api_type = str(self.api.__class__.__name__)
        rich_print(
            f":airplane: {api_type}: Uploading predictions from '{full_path}' for model [bold blue]'{model_name}'[/bold blue] (model_id='{model_id}') :airplane:"
        )
        self.api.upload_predictions(
            file_path=full_path, model_id=model_id, *args, **kwargs
        )
        rich_print(
            f":thumbs_up: {api_type} submission of '{full_path}' for [bold blue]{model_name}[/bold blue] is successful! :thumbs_up:"
        )

    def full_submission(
        self,
        dataf: Union[pd.DataFrame, NumerFrame],
        file_name: str,
        model_name: str,
        cols: Union[str, list],
        *args,
        **kwargs,
    ):
        """
        Save DataFrame to csv and upload predictions through API.
        *args, **kwargs are passed to numerapi API.
        """
        self.save_csv(dataf=dataf, file_name=file_name, cols=cols)
        self.upload_predictions(
            file_name=file_name, model_name=model_name, *args, **kwargs
        )

    def __call__(
        self,
        dataf: Union[pd.DataFrame, NumerFrame],
        file_name: str,
        model_name: str,
        cols: Union[str, list],
        *args,
        **kwargs,
    ):
        """
        The most common use case will be to create a CSV and submit it immediately after that.
        full_submission handles this.
        """
        self.full_submission(
            dataf=dataf,
            file_name=file_name,
            model_name=model_name,
            cols=cols,
            *args,
            **kwargs,
        )

    def _get_model_id(self, model_name: str) -> str:
        """Get ID needed for prediction uploading."""
        return self.get_model_mapping[model_name]

    @property
    def get_model_mapping(self) -> dict:
        """Mapping between raw model names and model IDs."""
        return self.api.get_models()

<IPython.core.display.Javascript object>

## 2. Numerai Classic

In [6]:
# export
@typechecked
class NumeraiClassicSubmittor(BaseSubmittor):
    """
    Submit for Numerai Classic.
    :param directory_path: Base directory to save and read prediction files from.
    :param key: Key object (numerai-blocks.key.Key) containing valid credentials for Numerai Classic.
    *args, **kwargs will be passed to NumerAPI initialization.
    """

    def __init__(self, directory_path: str, key: Key, *args, **kwargs):
        api = NumerAPI(public_id=key.pub_id, secret_key=key.secret_key, *args, **kwargs)
        super().__init__(
            directory_path=directory_path, api=api
        )

    def save_csv(
        self,
        dataf: Union[pd.DataFrame, NumerFrame],
        file_name: str,
        cols: Union[str, list],
        *args,
        **kwargs,
    ):
        """
        :param dataf: DataFrame which should have the following columns:
        1. id (as index column)
        2. cols (for example ['target'] or [20_NUMERAI_TARGETS]).
        """
        full_path = str(self.dir / file_name)
        rich_print(
            f":page_facing_up: Saving predictions CSV to '{full_path}'. :page_facing_up:"
        )

        dataf.loc[:, cols].to_csv(full_path, *args, **kwargs)

<IPython.core.display.Javascript object>

### NumeraiClassicSubmittor tests

In [7]:
# Initialization (Random credentials)
test_dir = "test_sub"
classic_key = Key(pub_id="UFVCTElDX0lE", secret_key="U1VQRVJfU0VDUkVUX0tFWQ==")
num_sub = NumeraiClassicSubmittor(directory_path=test_dir, key=classic_key)
assert num_sub.dir.is_dir()

# Create random dataframe
n_rows, n_columns = 100, 20
targets = [f"target_{i}" for i in range(n_columns)]
test_dataf = pd.DataFrame(np.random.uniform(size=(n_rows, n_columns)), columns=targets)
test_dataf["id"] = [uuid.uuid4() for _ in range(n_rows)]
test_dataf = test_dataf.set_index("id")
test_dataf.head(2)

Unnamed: 0_level_0,target_0,target_1,target_2,target_3,target_4,target_5,target_6,target_7,target_8,target_9,target_10,target_11,target_12,target_13,target_14,target_15,target_16,target_17,target_18,target_19
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
4b9a7317-c533-4057-ba4d-b23fedf9963b,0.975645,0.505722,0.565854,0.875254,0.208024,0.690705,0.09281,0.403739,0.333202,0.949469,0.88423,0.742207,0.203517,0.879048,0.843503,0.173738,0.486906,0.339642,0.705131,0.945091
ed37dd28-e323-4d24-b2b1-b6b02202d294,0.77628,0.892568,0.817309,0.787299,0.630828,0.402839,0.960256,0.493967,0.018156,0.015503,0.179613,0.55508,0.192776,0.311228,0.003717,0.537301,0.602811,0.260697,0.029461,0.718513


<IPython.core.display.Javascript object>

In [8]:
file_name = "test.csv"
num_sub.save_csv(dataf=test_dataf, file_name=file_name, cols=targets)
pd.read_csv(f"{test_dir}/{file_name}").head(2)

Unnamed: 0,id,target_0,target_1,target_2,target_3,target_4,target_5,target_6,target_7,target_8,...,target_10,target_11,target_12,target_13,target_14,target_15,target_16,target_17,target_18,target_19
0,4b9a7317-c533-4057-ba4d-b23fedf9963b,0.975645,0.505722,0.565854,0.875254,0.208024,0.690705,0.09281,0.403739,0.333202,...,0.88423,0.742207,0.203517,0.879048,0.843503,0.173738,0.486906,0.339642,0.705131,0.945091
1,ed37dd28-e323-4d24-b2b1-b6b02202d294,0.77628,0.892568,0.817309,0.787299,0.630828,0.402839,0.960256,0.493967,0.018156,...,0.179613,0.55508,0.192776,0.311228,0.003717,0.537301,0.602811,0.260697,0.029461,0.718513


<IPython.core.display.Javascript object>

In [9]:
# TODO Test full submission with dummy credentials
# Save CSV and upload
# num_sub.full_submission(dataf=test_dataf, file_name='test.csv', cols=targets, model_name="test")

<IPython.core.display.Javascript object>

In [10]:
# Remove contents
num_sub.remove_base_directory()
assert not os.path.exists(test_dir)

<IPython.core.display.Javascript object>

## 3. Numerai Signals

In [11]:
# export
@typechecked
class NumeraiSignalsSubmittor(BaseSubmittor):
    """
    Submit for Numerai Signals
    :param directory_path: Base directory to save and read prediction files from.
    :param key: Key object (numerai-blocks.key.Key) containing valid credentials for Numerai Signals.
    *args, **kwargs will be passed to SignalsAPI initialization.
    """

    def __init__(self, directory_path: str, key: Key, *args, **kwargs):
        api = SignalsAPI(
            public_id=key.pub_id, secret_key=key.secret_key, *args, **kwargs
        )
        super().__init__(
            directory_path=directory_path, api=api
        )
        self.supported_ticker_formats = [
            "cusip",
            "sedol",
            "ticker",
            "numerai_ticker",
            "bloomberg_ticker",
        ]

    def save_csv(
        self, dataf: Union[pd.DataFrame, NumerFrame], file_name: str, cols: list = None, *args, **kwargs
    ):
        """
        :param dataf: DataFrame which should have at least the following columns:
         1. One of supported ticker formats (cusip, sedol, ticker, numerai_ticker or bloomberg_ticker)
         2. signal (Values between 0 and 1 (exclusive))
         Additional columns for if you include validation data (optional):
         3. friday_date (YYYYMMDD format date indication)
         4. data_type ('val' and 'live' partitions)

         :param file_name: For example, 'sub_<model_name>_round<n>.csv'
         :param cols: All cols that should be passed to CSV. Defaults to 2 standard columns.
          ('bloomberg_ticker', 'signal')
        """
        if not cols:
            cols = ["bloomberg_ticker", "signal"]

        # Check for valid ticker format
        valid_tickers = set(cols).intersection(set(self.supported_ticker_formats))
        if not valid_tickers:
            raise NotImplementedError(
                f"No supported ticker format in {cols}). \
            Supported: '{self.supported_ticker_formats}'"
            )

        # signal must be in range (0...1)
        if not dataf["signal"].between(0, 1).all():
            min_val, max_val = dataf["signal"].min(), dataf["signal"].max()
            raise ValueError(
                f"Values in 'signal' must be between 0 and 1 (exclusive). Found min value of '{min_val}' and max value of '{max_val}'"
            )

        full_path = str(self.dir / file_name)
        rich_print(
            f":page_facing_up: Saving Signals predictions CSV to '{full_path}'. :page_facing_up:"
        )
        dataf.loc[:, cols].reset_index(drop=True).to_csv(
            full_path, index=False, *args, **kwargs
        )

<IPython.core.display.Javascript object>

### NumeraiSignalsSubmittor tests

In [12]:
# Initialization (Random credentials)
test_dir_signals = "test_sub_signals"
signals_key = Key(pub_id="UFVCTElDX0lE", secret_key="U1VQRVJfU0VDUkVUX0tFWQ==")
signals_sub = NumeraiSignalsSubmittor(directory_path=test_dir_signals, key=signals_key)
assert signals_sub.dir.is_dir()

<IPython.core.display.Javascript object>

In [13]:
# Create random dataframe
n_rows, n_columns = 5000, 4
signals_test_dataf = pd.DataFrame(
    np.random.uniform(size=(n_rows, 1)), columns=["signal"]
)
signals_test_dataf["ticker"] = [
    "".join(choices(ascii_uppercase, k=4)) for _ in range(n_rows)
]
last_friday = int(
    str((datetime.now() + relativedelta(weekday=FR(-1))).date()).replace("-", "")
)
signals_test_dataf["friday_date"] = last_friday
signals_test_dataf["data_type"] = "live"
signals_test_dataf["aux_column"] = "metadata"
signals_test_dataf.head(2)

Unnamed: 0,signal,ticker,friday_date,data_type,aux_column
0,0.356858,UEUT,20220128,live,metadata
1,0.46633,ZTKX,20220128,live,metadata


<IPython.core.display.Javascript object>

In [14]:
signals_cols = ["signal", "ticker", "friday_date", "data_type"]
file_name = "signals_test.csv"
signals_sub.save_csv(dataf=signals_test_dataf, file_name=file_name, cols=signals_cols)
pd.read_csv(f"{test_dir_signals}/{file_name}").head(2)

Unnamed: 0,signal,ticker,friday_date,data_type
0,0.356858,UEUT,20220128,live
1,0.46633,ZTKX,20220128,live


<IPython.core.display.Javascript object>

Saving Signals CSV should fail if there is no valid ticker column or if `signal` has values outside the range $(0...1)$.

In [15]:
def test_signal_validity(
    submittor: NumeraiSignalsSubmittor, signals_dataf: Union[pd.DataFrame, NumerFrame]
):
    try:
        invalid_signal = deepcopy(signals_dataf)
        invalid_signal.loc[0, "signal"] += 10
        submittor.save_csv(
            invalid_signal,
            file_name="should_not_save.csv",
            cols=list(invalid_signal.columns),
        )
    except ValueError:
        return True
    return False


def test_ticker_validity(
    submittor: NumeraiSignalsSubmittor, signals_dataf: Union[pd.DataFrame, NumerFrame]
):
    try:
        invalid_ticker = deepcopy(signals_dataf)
        invalid_ticker = invalid_ticker.rename(
            {"ticker": "not_a_valid_ticker_format"}, axis=1
        )
        submittor.save_csv(
            invalid_ticker,
            file_name="should_not_save.csv",
            cols=list(invalid_ticker.columns),
        )
    except NotImplementedError:
        return True
    return False


assert test_signal_validity(signals_sub, signals_test_dataf)
assert test_ticker_validity(signals_sub, signals_test_dataf)

<IPython.core.display.Javascript object>

In [16]:
# TODO Test full submission with dummy credentials
# Save CSV and upload
# signals_sub.full_submission(dataf=signals_test_dataf, file_name='signals_test.csv', cols=signals_cols, model_name="test")

<IPython.core.display.Javascript object>

In [17]:
# Remove contents
signals_sub.remove_base_directory()
assert not os.path.exists(test_dir_signals)

<IPython.core.display.Javascript object>

------------------------------------------------------------

In [18]:
# hide
# Run this cell to sync all changes with library
from nbdev.export import notebook2script

notebook2script()

Converted 01_download.ipynb.
Converted 02_numerframe.ipynb.
Converted 03_preprocessing.ipynb.
Converted 04_model.ipynb.
Converted 05_postprocessing.ipynb.
Converted 06_modelpipeline.ipynb.
Converted 07_evaluation.ipynb.
Converted 08_key.ipynb.
Converted 09_submission.ipynb.
Converted 10_staking.ipynb.
Converted index.ipynb.


<IPython.core.display.Javascript object>