### Feature Exposure clipping by gradient descent

This notebook is a tensorflow port of [@MDO](https://numer.ai/mdo)'s [pytorch code](https://forum.numer.ai/t/model-diagnostics-feature-exposure/899/12) by [@jrb](https://numer.ai/jrb).

Maintainer: [@arbitrage](https://numer.ai/arbitrage)

In [None]:
import os
import pathlib
import numpy as np
import pandas as pd
import scipy.stats
import tensorflow as tf
import joblib
from tqdm.notebook import tqdm

NUMERAI_S3_BUCKET_URL = "https://numerai-public-datasets.s3-us-west-2.amazonaws.com"

#read in the example predictions from local storage
#EXAMPLE_PREDS = 'tournament_predictions.csv'

#or downlod the example predictions from Numerai's S3 bucket:
EXAMPLE_PREDS_URL = NUMERAI_S3_BUCKET_URL + "/latest_numerai_example_predictions_data.csv.xz"

#download the latest tournament data file:
TOURNAMENT_DATA_URL = NUMERAI_S3_BUCKET_URL + "/latest_numerai_tournament_data.csv.xz"

###IMPORTANT! DELETE THE FILE BELOW IF YOU CHANGE MODELS! OTHERWISE, RENAME THE FILE FOR YOUR VARIOUS MODELS###
LM_CACHE_FILE = pathlib.Path("neutralization.cache.joblib")

@tf.function(experimental_relax_shapes=True, experimental_compile=True)
def exposures(x, y):
    x = x - tf.math.reduce_mean(x, axis=0)
    x = x / tf.norm(x, axis=0)
    y = y - tf.math.reduce_mean(y, axis=0)
    y = y / tf.norm(y, axis=0)
    return tf.matmul(x, y, transpose_a=True)

@tf.function(experimental_relax_shapes=True)
def train_loop_body(model, feats, pred, target_exps):
    with tf.GradientTape() as tape:
        exps = exposures(feats, pred[:, None] - model(feats, training=True))
        loss = tf.reduce_sum(tf.nn.relu(tf.nn.relu(exps) - tf.nn.relu(target_exps)) +
                             tf.nn.relu(tf.nn.relu(-exps) - tf.nn.relu(-target_exps)))
    return loss, tape.gradient(loss, model.trainable_variables)

def train_loop(model, optimizer, feats, pred, target_exps, era):
    for i in range(1000000):
        loss, grads = train_loop_body(model, feats, pred, target_exps)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        if loss < 1e-7:
            break
        if i % 10000 == 0:
            tqdm.write(f'era: {era[3:]} loss: {loss:0.7f}', end='\r')
            
def reduce_exposure(prediction, features, max_exp, era, weights=None):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Input(310),
        tf.keras.experimental.LinearModel(use_bias=False),
    ])
    feats = tf.convert_to_tensor(features - 0.5, dtype=tf.float32)
    pred = tf.convert_to_tensor(prediction, dtype=tf.float32)
    if weights is None:
        optimizer = tf.keras.optimizers.Adamax()
        start_exp = exposures(feats, pred[:, None])
        target_exps = tf.clip_by_value(start_exp, -max_exp, max_exp)
        train_loop(model, optimizer, feats, pred, target_exps, era)
    else:
        model.set_weights(weights)
    return pred[:,None] - model(feats), model.get_weights()

def reduce_all_exposures(df, column=["prediction"], neutralizers=None,
                                     normalize=True,
                                     gaussianize=True,
                                     era_col="era",
                                     max_exp=0.1): ###<-----SELECT YOUR MAXIMUM FEATURE EXPOSURE HERE###
    if neutralizers is None:
        neutralizers = [x for x in df.columns if x.startswith("feature")]
    neutralized = []
    if LM_CACHE_FILE.is_file():
        cache = joblib.load(LM_CACHE_FILE)
        # Remove weights for eraX if we'd accidentally saved it in the past.
        cache.pop("eraX", None)
    else:
        cache = {}
    for era in tqdm(df[era_col].unique()):
        tqdm.write(era, end='\r')
        df_era = df[df[era_col] == era]
        scores = df_era[column].values
        exposure_values = df_era[neutralizers].values

        if normalize:
            scores2 = []
            for x in scores.T:
                x = (scipy.stats.rankdata(x, method='ordinal') - .5) / len(x)
                if gaussianize:
                    x = scipy.stats.norm.ppf(x)
                scores2.append(x)
            scores = np.array(scores2)[0]

        scores, weights = reduce_exposure(scores, exposure_values,
                                          max_exp, era, cache.get(era))
        if era not in cache and era != "eraX":
            cache[era] = weights
            joblib.dump(cache, LM_CACHE_FILE)
        scores /= tf.math.reduce_std(scores)
        scores -= tf.reduce_min(scores)
        scores /= tf.reduce_max(scores)
        neutralized.append(scores.numpy())

    predictions = pd.DataFrame(np.concatenate(neutralized),
                               columns=column, index=df.index)
    return predictions

#If CUDA isn't set up properly for Tensorflow, then at least maximize the number of threads available for CPU
if not tf.config.list_physical_devices('GPU'):  # No GPU(s) found
    tf.config.threading.set_inter_op_parallelism_threads(2)
    tf.config.threading.set_intra_op_parallelism_threads(os.cpu_count() // 2)

#read-in or download the example predictions
exp_df = pd.read_csv(EXAMPLE_PREDS_URL, index_col=0)

#download the tournament data
tournament_df = pd.read_csv(TOURNAMENT_DATA_URL, index_col=0)

#merge them together
full_df = pd.merge(tournament_df, exp_df, left_index=True, right_index=True)

In [None]:
#this cell executes the full script above and neutralizes the predictions to achieve a maximum 0.1 Feature Exposure
neutralized_df = reduce_all_exposures(full_df)

In [None]:
#save your prediction file locally
neutralized_df.to_csv("neutralized_predictions.csv")