In this notebook, we go through the theory of overfitting. When researchers conduct multiple trials, the probability to find strategy that looks profitable in backtesting without any statistical edges increases. We called it here selection bias under multiple testting (SBuMT). 

SBuMT is compounded at many asset managers as a result of two levels:
1. Each research runs millions of simulations and provide the best result to their boss
2. The company further selects a few backtests among already overfitted results submitted by researchers

We work on estimating these bias effects in order to estimate results properly.

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

In [3]:
from scipy.stats import norm, percentileofscore
from tqdm import tqdm_notebook

def get_expected_max_SR(n_trials, mean_sr, std_sr):
    #Expected max SR, controlling for SBuMT
    emc = 0.577215664901532860606512090082402431042159336
    sr0 = (1 - emc)*norm.ppf(1 - 1./n_trials) + emc*norm.ppf(1 - 1./(n_trials*np.e))
    sr0 = mean_sr + std_sr*sr0
    return sr0

def get_dist_max_SR(n_sims, n_trials, mean_sr, std_sr, verbose=0):
    # Monte Carlo of max{SR} on n_trials from n_sims simulations
    rng = np.random.RandomState()
    out = pd.DataFrame()
    n_sims = int(n_sims)
    if verbose > 0:
        n_trials = tqdm_notebook(n_trials)
    for trial_i in n_trials:
        # 1) Simulate Sharpe Ratios
        sr = pd.DataFrame(rng.randn(n_sims, trial_i))
        sr = sr.sub(sr.mean(axis=1), axis=0)
        sr = sr.div(sr.std(axis=1), axis=0)
        sr = mean_sr + sr * std_sr
        # 2) Store output
        out_ = sr.max(axis=1).to_frame('max{SR}')
        out_['n_trials'] = trial_i
        out = out.append(out_, ignore_index=True)
    return out

In [4]:
n_trials = list(set(np.logspace(1, 3, 100).astype(int)))
n_trials.sort()
sr0 = pd.Series({i: get_expected_max_SR(i, mean_sr=0, std_sr=1) for i in n_trials})
sr1 = get_dist_max_SR(n_sims=1000, n_trials=n_trials, mean_sr=0, std_sr=1, verbose=1)

HBox(children=(IntProgress(value=0, max=95), HTML(value='')))




In [11]:
def get_mean_std_error(n_sims_sr, n_sims_err, n_trials, std_sr=1, mean_sr=0, verbose=0):
    sr0 = pd.Series({i: get_expected_max_SR(i, mean_sr, std_sr) for i in n_trials})
    sr0 = sr0.to_frame("E[max{SR}]")
    sr0.index.name = "n_trials"
    err = pd.DataFrame()
    n_sims_sr = int(n_sims_sr)
    n_sims_err = int(n_sims_err)
    if verbose > 0:
        n_sims_err = tqdm_notebook(range(n_sims_err))
    else:
        n_sims_err = range(n_sims_err)
    for i in n_sims_err:
        sr1 = get_dist_max_SR(n_sims=n_sims_sr, n_trials=n_trials, mean_sr=0, std_sr=1)
        sr1 = sr1.groupby("n_trials").mean()
        err_ = sr0.join(sr1).reset_index()
        err_["err"] = err_["max{SR}"] / err_["E[max{SR}]"] - 1.
        err = err.append(err_)
    out = {"meanErr": err.groupby("n_trials")["err"].mean()}
    out["stdErr"] = err.groupby("n_trials")["err"].std()
    out = pd.DataFrame.from_dict(out, orient="columns")
    return out

In [None]:
n_trials = list(set(np.logspace(1, 4, 100).astype(int)))
n_trials.sort()
n_trials.sort()
n_sims_sr = 1e3
n_sims_err = 1e3
err = get_mean_std_error(n_sims_sr, n_sims_err, n_trials, verbose=1)

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))