In [1]:
import pickle
import numpy as np 
import pandas as pd 
from pathlib import Path 
from settings import settings 

## Define Parameter

In [2]:
data_path = Path("result")

In [3]:
cluster_labels = pd.read_parquet(data_path / "cluster_labels.parquet")

In [4]:
with open(data_path / "eb_est.pkl", "rb") as f:
    eb_est = pickle.load(f)

## Simulations EB vs BY

In [21]:
pairwise_cor = eb_est['us']['input']['long'].pivot_table(index='eom', columns='characteristic', values='ret_neu').corr(method='pearson')

In [22]:
pairwise_cor = pairwise_cor.stack().to_frame("cor")
pairwise_cor.index.names = ["level_0", "level_1"]
cor_value = pairwise_cor.reset_index().copy()

In [23]:
# 각각 factor별 cluster 이름 merge
cor_value = cor_value.merge(cluster_labels[['characteristic', 'hcl_label']].rename(columns={'hcl_label': 'hcl1'}), left_on='level_0', right_on='characteristic', how='left')
cor_value = cor_value.merge(cluster_labels[['characteristic', 'hcl_label']].rename(columns={'hcl_label': 'hcl2'}), left_on='level_1', right_on='characteristic', how='left')

In [33]:
cor_value['same_cluster'] = (cor_value['hcl1'] == cor_value['hcl2'])

In [34]:
cor_value_avg = cor_value.groupby('same_cluster')['cor'].mean().reset_index(name='avg_cor')

In [36]:
# 같은 cluster와 다른 cluster간에 상관관계 평균
cor_value_avg = cor_value.groupby('same_cluster')['cor'].mean().reset_index(name='avg_cor')

### Time Periods

In [40]:
med_months = eb_est['us']['input']['long'].groupby('characteristic')['eom'].count().median()

In [43]:
data_sim = {
    'yrs': round(med_months / 12),
    'cor_within': round(cor_value_avg[cor_value_avg['same_cluster'] == True]['avg_cor'].values[0], 2),
    'cor_across': round(cor_value_avg[cor_value_avg['same_cluster'] == False]['avg_cor'].values[0], 2)
}

In [44]:
# Set seed
np.random.seed(settings['seed'])

In [45]:
sim = {
    "alpha_0": 0,
    "t": 12 * 70,
    "clusters": 13,
    "fct_pr_cl": 10,
    "corr_within": 0.58,
    "corr_across": 0.02,
    "n_sims": 10000,
    "tau_c": [0.01] + list(np.arange(0.05, 0.55, 0.05)),
    "tau_w": [0.01, 0.2],
}
sim['se'] = (10 / np.sqrt(12)) / np.sqrt(sim['t'])
sim['n'] = sim['clusters'] * sim['fct_pr_cl']