In [None]:
import numpy as np
import pandas as pd
import tqdm
import os
import plotly.express as px
from scipy.optimize import differential_evolution
import sys
sys.path.append('/n/groups/price/jordan/h2xancestry/scripts')
import ldsc_utils

# common domain sims

In [None]:
w_df_list = []
for w in [0, .05, .25, .5, .75, .95, 1]:
    d = f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_thresholded_.005'
    files = os.listdir(d)
    grouped_trait_files = [f for f in files if f.endswith('common_domain.alpha_mix.skip_jackknife.txt') and f.startswith('seeds_')]
    for f in grouped_trait_files:
        df = pd.read_csv(
            f'{d}/{f}', 
            sep = '\t'
        ).assign(
            true_w = w,
            seed_start = f.split('_')[1],
            seed_end = f.split('_')[2].split('.')[0]
        ).assign(inference = 'Baseline-LD (v2.2) + 2-D MAF bins')
        w_df_list.append(df)
w_df = pd.concat(w_df_list)
w_df.groupby('true_w').apply(lambda df: pd.DataFrame({
    'w_mean' : [df.w.mean()],
    'w_std' : df.w.std(),
    'n' : df.shape[0]
}))
px.box(
    w_df,
    x = 'true_w',
    y = 'alpha',
)
#w_df.to_csv('/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/simulation_summaries/alpha_.38_h2_.5_ukbb_common_domain.csv', index = False)

# European MAF misspecification sims 

In [None]:
w_df_list = []
for w in [0, .05, .25, .5, .75, .95, 1]:
    files = os.listdir(f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_ukbb_british_maf')
    grouped_trait_files = [f for f in files if f.endswith('.alpha_mix.skip_jackknife.txt') and f.startswith('seeds_')]
    for f in grouped_trait_files:
        df = pd.read_csv(
            f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_ukbb_british_maf/{f}', 
            sep = '\t'
        ).assign(
            true_w = w,
            seed_start = f.split('_')[1],
            seed_end = f.split('_')[2].split('.')[0]
        ).assign(inference = 'Baseline-LD (v2.2) + 2-D MAF bins')
        w_df_list.append(df)
w_df = pd.concat(w_df_list)
w_df.to_csv('/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/simulation_summaries/alpha_.38_h2_.5_ukbb_british_maf.csv', index = False)

In [None]:
px.box(
    w_df,
    x = 'true_w',
    y = 'w',
)

# single trait estimation

In [None]:
w_df_list = []
for w in [0, .05, .25, .5, .75, .95, 1]:
    files = os.listdir(f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_thresholded_.005')
    grouped_trait_files = [f for f in files if f.endswith('.alpha_mix.skip_jackknife.txt') and f.startswith('seed_')]
    for f in grouped_trait_files:
        df = pd.read_csv(
            f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_thresholded_.005/{f}', 
            sep = '\t'
        ).assign(
            true_w = w,
            seed_start = f.split('_')[1],
            seed_end = f.split('_')[2].split('.')[0]
        ).assign(inference = 'Baseline-LD (v2.2) + 2-D MAF bins')
        w_df_list.append(df)
w_df = pd.concat(w_df_list)
w_df.to_csv('/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/simulation_summaries/alpha_.38_h2_.5_thresholded_.005_indiv.csv', index = False)

In [None]:
px.box(
    w_df,
    x = 'true_w',
    y = 'w',
)

# N causal = 20000

In [None]:
w_df_list = []
for w in [0, .05, .25, .5, .75, .95, 1]:
    files = os.listdir(f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_n_causal_20000')
    grouped_trait_files = [f for f in files if f.endswith('.alpha_mix.skip_jackknife.txt') and f.startswith('seeds')]
    for f in grouped_trait_files:
        df = pd.read_csv(
            f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_n_causal_20000/{f}', 
            sep = '\t'
        ).assign(
            true_w = w,
            seed_start = f.split('_')[1],
            seed_end = f.split('_')[2].split('.')[0]
        ).assign(inference = 'Baseline-LD (v2.2) + 2-D MAF bins')
        w_df_list.append(df)
w_df = pd.concat(w_df_list)
w_df.to_csv('/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/simulation_summaries/alpha_.38_h2_.5_n_causal_20000.csv', index = False)

In [None]:
px.box(
    w_df,
    x = 'true_w',
    y = 'w',
)

# N causal = 5000

In [None]:
w_df_list = []
for w in [0, .05, .25, .5, .75, .95, 1]:
    files = os.listdir(f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_n_causal_5000')
    grouped_trait_files = [f for f in files if f.endswith('.alpha_mix.skip_jackknife.txt') and f.startswith('seeds')]
    for f in grouped_trait_files:
        df = pd.read_csv(
            f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_n_causal_5000/{f}', 
            sep = '\t'
        ).assign(
            true_w = w,
            seed_start = f.split('_')[1],
            seed_end = f.split('_')[2].split('.')[0]
        ).assign(inference = 'Baseline-LD (v2.2) + 2-D MAF bins')
        w_df_list.append(df)
w_df = pd.concat(w_df_list)
w_df.to_csv('/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/simulation_summaries/alpha_.38_h2_.5_n_causal_5000.csv', index = False)

In [None]:
px.box(
    w_df,
    x = 'true_w',
    y = 'w',
)

# T = .05

In [None]:
w_df_list = []
for w in [0, .05, .25, .5, .75, .95, 1]:
    files = os.listdir(f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_thresholded_.05')
    grouped_trait_files = [f for f in files if f.endswith('.alpha_mix.skip_jackknife.txt') and f.startswith('seeds')]
    for f in grouped_trait_files:
        df = pd.read_csv(
            f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_thresholded_.05/{f}', 
            sep = '\t'
        ).assign(
            true_w = w,
            seed_start = f.split('_')[1],
            seed_end = f.split('_')[2].split('.')[0]
        ).assign(inference = 'Baseline-LD (v2.2) + 2-D MAF bins')
        w_df_list.append(df)
w_df = pd.concat(w_df_list)
w_df.to_csv('/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/simulation_summaries/alpha_.38_h2_.5_thresholded_.05.csv', index = False)

In [None]:
px.box(
    w_df,
    x = 'true_w',
    y = 'w',
)

# T = MAC 1

In [None]:
w_df_list = []
for w in [0, .05, .25, .5, .75, .95, 1]:
    files = os.listdir(f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_thresholded_MAC1')
    grouped_trait_files = [f for f in files if f.endswith('.alpha_mix.skip_jackknife.txt') and f.startswith('seeds')]
    for f in grouped_trait_files:
        df = pd.read_csv(
            f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_thresholded_MAC1/{f}', 
            sep = '\t'
        ).assign(
            true_w = w,
            seed_start = f.split('_')[1],
            seed_end = f.split('_')[2].split('.')[0]
        ).assign(inference = 'Baseline-LD (v2.2) + 2-D MAF bins')
        w_df_list.append(df)
w_df = pd.concat(w_df_list)
w_df.to_csv('/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/simulation_summaries/alpha_.38_h2_.5_thresholded_MAC1.csv', index = False)

In [None]:
px.box(
    w_df,
    x = 'true_w',
    y = 'w',
)

# main sims

In [None]:
w_df_list = []
for w in [0, .05, .25, .5, .75, .95, 1]:
    files = os.listdir(f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_thresholded_.005')
    grouped_trait_files = [f for f in files if f.endswith('.alpha_mix.skip_jackknife.txt') and f.startswith('seeds')]
    for f in grouped_trait_files:
        df = pd.read_csv(
            f'/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/w_{w}_alpha_.38_h2_.5_thresholded_.005/{f}', 
            sep = '\t'
        ).assign(
            true_w = w,
            seed_start = f.split('_')[1],
            seed_end = f.split('_')[2].split('.')[0]
        ).assign(inference = 'Baseline-LD (v2.2) + 2-D MAF bins')
        w_df_list.append(df)
w_df = pd.concat(w_df_list)
w_df.to_csv('/n/groups/price/jordan/h2xancestry/data/simulations/alpha_mix_2/simulation_summaries/alpha_.38_h2_.5_thresholded_.005.csv', index = False)

In [None]:
px.box(
    w_df,
    x = 'true_w',
    y = 'w',
)
