In [24]:
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
from pathlib import Path 
from settings import settings
from scipy.stats import multivariate_normal
from statsmodels.stats.multitest import multipletests

## Define Parameter

In [25]:
data_path = Path("result")

In [26]:
with open(data_path / "eb_est.pkl", "rb") as f:
    eb_est = pickle.load(f)

## FDR(False Discover Rate) Testing

In [27]:
def fdr_sim(t_low, a_vec, a_cov, n_sim=10000, seed=1):
    np.random.seed(seed) 
    t_all = a_vec / np.sqrt(np.diag(a_cov)) 

    t_steps = np.sort(t_all[t_all > t_low])
    t_steps = t_steps[:-1]            

    sims = multivariate_normal.rvs(mean=a_vec, cov=a_cov, size=n_sim) 
    results = []
    for t in tqdm(t_steps):
        # Significant alphas under t-cutoff
        sig = (t_all >= t)
        sims_fdr = np.mean(np.sign(sims[:, sig]) != np.sign(np.tile(a_vec[sig].values, (n_sim, 1))), axis=1) 

        fdr = np.mean(sims_fdr)
        fwr = np.mean(sims_fdr > 0)

        results.append({
                "t_cutoff": t,
                "n_sig": np.sum(sig),
                "fdr": fdr,
                "fwr": fwr
            })

    return pd.DataFrame(results)

In [28]:
model_fdr = fdr_sim(
    t_low = 0,
    a_vec=eb_est['us']['factor_mean'].copy(),
    a_cov=eb_est['us']['factor_cov'].copy(),
    n_sim=10000,
    seed=settings['seed'],
)

100%|██████████| 141/141 [00:00<00:00, 167.55it/s]


## Multiple Testing

In [29]:
eb_all = eb_est["all"]

KeyError: 'all'

In [30]:
eb_est.keys()

dict_keys(['us', 'developed', 'emerging', 'world', 'world_ex_us', 'us_mega', 'us_large', 'us_small', 'us_micro', 'us_nano'])