In [1]:
import os
import sys

sys.path.append(os.path.abspath("../src/"))

In [8]:
import lightgbm as lgb
import numpy as np
import pandas as pd

import utils

In [3]:
_, test = utils.read_current("../data/round_254/")
validation = test.query("data_type == 'validation'").copy()

In [4]:
model = lgb.Booster(model_file="../models/lgb001.txt")

In [5]:
features = [f for f in validation.columns if f.startswith("feature")]
len(features)

310

In [6]:
validation["prediction"] = model.predict(validation[features])

In [10]:
from sklearn.metrics import mean_absolute_error


def spearmanr(target, pred):
    return np.corrcoef(target, pred.rank(pct=True, method="first"))[0, 1]


def sharpe_ratio(corrs: pd.Series) -> np.float32:
    """
        Calculate the Sharpe ratio for Numerai by using grouped per-era data

        :param corrs: A Pandas Series containing the Spearman correlations for each era
        :return: A float denoting the Sharpe ratio of your predictions.
        """
    return corrs.mean() / corrs.std()


# https://parmarsuraj99.medium.com/evaluating-financial-machine-learning-models-on-numerai-3562da8fd90
def calculate_feature_exposure(df, feature_names, prediction_name="prediction") -> list:
    """
    Example:
    -----
    feature_exposure, max_feat_exposure, square_sum_feature_exposure = calculate_feature_exposure(df, feature_names)
    """

    exposures = []
    for feature_name in feature_names:
        exposures.append(spearmanr(df[feature_name], df[prediction_name]))

    max_feat_exposure = np.max(np.abs(exposures))
    square_sum_feature_exposure = np.sum([e ** 2 for e in exposures])
    feature_exposure = np.std(exposures)

    return [feature_exposure, max_feat_exposure, square_sum_feature_exposure]


# Calculating Max Drawdown
def max_drawdown(df, prediction_name="prediction", target_name="target"):
    scores_per_era = df.groupby("era").apply(
        lambda x: spearmanr(x[prediction_name], x[target_name])
    )

    rolling_max = (
        (scores_per_era + 1).cumprod().rolling(window=100, min_periods=1).max()
    )
    daily_value = (scores_per_era + 1).cumprod()
    max_drawdown = (rolling_max - daily_value).max()

    return max_drawdown


def evaluate(df: pd.DataFrame, features) -> tuple:
    """
        Evaluate and display relevant metrics for Numerai 

        :param df: A Pandas DataFrame containing the columns "era", "target" and a column for predictions
        :param pred_col: The column where the predictions are stored
        :return: A tuple of float containing the metrics
        """

    def _score(sub_df: pd.DataFrame) -> np.float32:
        """Calculates Spearman correlation"""
        return spearmanr(sub_df["target"], sub_df["prediction"])

    # Calculate metrics
    corrs = df.groupby("era").apply(_score)
    print(corrs)
    payout_raw = (corrs / 0.2).clip(-1, 1)
    spearman = round(corrs.mean(), 4)

    payout = round(payout_raw.mean(), 4)
    numerai_sharpe = round(sharpe_ratio(corrs), 4)
    mae = mean_absolute_error(df["target"], df["prediction"]).round(4)
    drawdown = max_drawdown(df)
    fe, max_fe, square_sum_fe = calculate_feature_exposure(df, features)

    # Display metrics
    print(f"Spearman Correlation: {spearman}")
    print(f"Average Payout: {payout}")
    print(f"Sharpe Ratio: {numerai_sharpe}")
    print(f"Mean Absolute Error (MAE): {mae}")
    print(f"Max drawdown: {drawdown}")
    print(
        f"Feature exposure: {fe}, Max Feature Exposure: {max_fe}, Square Sum: {square_sum_fe}"
    )
    return spearman, payout, numerai_sharpe, mae


spearman, payout, numerai_sharpe, mae = evaluate(validation, features)

era
era121    0.044188
era122    0.019480
era123    0.041258
era124    0.062225
era125    0.024602
era126    0.043132
era127    0.012770
era128    0.060628
era129   -0.017926
era130    0.067782
era131    0.035525
era132    0.069603
era197    0.031571
era198    0.013926
era199   -0.024592
era200   -0.002004
era201    0.006837
era202    0.037300
era203    0.031323
era204    0.024924
era205    0.003320
era206   -0.012033
era207    0.053210
era208    0.053396
era209    0.041266
era210   -0.018317
era211   -0.024732
era212    0.022163
dtype: float64
Spearman Correlation: 0.025
Average Payout: 0.1251
Sharpe Ratio: 0.8868
Mean Absolute Error (MAE): 0.1542
Max drawdown: 0.08127724091443644
Feature exposure: 0.08414838848691095, Max Feature Exposure: 0.3081406795169704, Square Sum: 2.404067154818816
