In [1]:
import pandas as pd
import numpy as np; np.random.seed(42)
import os
import matplotlib.pyplot as plt; plt.style.use("dark_background")
import seaborn as sns
import sys; sys.path.append(
    "/data/jerrylee/pjt/BIGFAM.v.2.0"
)
from src import obj1, tools
from tqdm import tqdm

In [2]:
# ad-hoc functions
def simul_frreg(G, S, w, dor):
    """Make FR-reg coefficient.
    
    G : heritability
    S : variance component by shared env (d=1)
    w : decaying parameter for S
    dor : degree of relatedness
    """
    by_G = G * (1/2)**dor
    by_S = S * (1/w)**(dor-1)
    
    return by_G + by_S

# Step 1. Make FR-reg for Four shared environmental scenarios

1. All asumption valid
2. 1st degree specific
3. Maternal effect
4. 2nd degree dominant(ex. 0.05, 0.1, 0.01)

In [3]:
def scenario_1_valid(G, S, w, e):
    """Calculate FR-reg coefficients under standard conditions.
    
    This scenario represents the baseline case where both genetic and 
    environmental effects follow expected patterns of decay across generations.
    
    Args:
        G (float): Heritability (0-1)
        S (float): Shared environmental component (0-1)
        w (float): Environmental decay parameter (>1)
        e (float): Standard error of FR-reg coefficient
        
    Returns:
        DataFrame: FR-reg coefficients for degrees 1-3
    """
    df_frreg = pd.DataFrame({
        "DOR": [1, 2, 3],
        "slope": [simul_frreg(G, S, w, d) for d in range(1, 4)],
        "se": e,
        "p": 0.0,
        "n": 0
    })
    return df_frreg

def scenario_2_1st_deg(G, S, w, e):
    """Calculate FR-reg coefficients with shared environment only in first-degree relatives.
    
    This scenario models cases where environmental effects are only present
    between first-degree relatives (e.g., parent-child, siblings) and absent
    in more distant relationships.
    
    Args:
        G (float): Heritability (0-1)
        S (float): Shared environmental component (only applies to first degree)
        w (float): Environmental decay parameter (>1)
        e (float): Standard error of FR-reg coefficien
        
    Returns:
        DataFrame: FR-reg coefficients for degrees 1-3
    """
    df_frreg = pd.DataFrame({
        "DOR": [1, 2, 3],
        "slope": [simul_frreg(G, S, w, dor=1), 
                 simul_frreg(G, 0, w, dor=2),
                 simul_frreg(G, 0, w, dor=3),],
        "se_slope": e,
    })
    return df_frreg

def scenario_3_maternal(G, S, w, e):
    """Calculate FR-reg coefficients with enhanced maternal effects.
    
    This scenario models stronger environmental effects in mother-offspring
    relationships compared to other first-degree relationships.
    Among the 7 types of first-degree relationships 
    (mother-son, mother-daughter, father-son, father-daughter, 
    brother-brother, sister-sister, brother-sister), 
    only the 2 maternal relationships (mother-son, mother-daughter) 
    have 1.5x stronger environmental effects.
    The final FR-reg coefficient for DOR=1 is calculated as the average 
    of all 7 relationship types.
    
    Args:
        G (float): Heritability (0-1)
        S (float): Base shared environmental component
        w (float): Environmental decay parameter
        e (float): Standard error of FR-reg coefficien
        
    Returns:
        DataFrame: FR-reg coefficients for degrees 1-3
    """
    frreg_mother_offspring = simul_frreg(G, 1.5*S, w=1, dor=1)
    frreg_others = simul_frreg(G, S, w=1, dor=1)
    
    frreg_1st = (2 * frreg_mother_offspring + 5 * frreg_others) / 7
    
    df_frreg = pd.DataFrame({
        "DOR": [1, 2, 3],
        "slope": [frreg_1st, 
                 simul_frreg(G, S, w, dor=2), 
                 simul_frreg(G, S, w, dor=3),],
        "se_slope": e,
    })
    return df_frreg

def scenario_4_2nd_deg(G, S, w, e):
    """Calculate FR-reg coefficients with enhanced second-degree effects.
    
    This scenario models cases where environmental effects are particularly
    strong in second-degree relatives (3x), which might occur in extended
    family settings or specific cultural contexts.
    
    Args:
        G (float): Heritability (0-1)
        S (float): Base shared environmental component
        w (float): Environmental decay parameter
        e (float): Standard error of FR-reg coefficien
        
    Returns:
        DataFrame: FR-reg coefficients for degrees 1-3
    """
    df_frreg = pd.DataFrame({
        "DOR": [1, 2, 3],
        "slope": [simul_frreg(G, S, w, dor=1), 
                 simul_frreg(G, 3*S, w, dor=2),
                 simul_frreg(G, S, w, dor=3),],
        "se_slope": e,
    })
    return df_frreg


In [4]:
scenarios = {
    "SC1": scenario_1_valid,
    "SC2": scenario_2_1st_deg,
    "SC3": scenario_3_maternal,
    "SC4": scenario_4_2nd_deg
}

df_results = pd.DataFrame(columns=[
    # true values
    "scenario", "G", "S", "w", 
    # slope-test
    "slope_slope", "slope_slope_lower", "slope_slope_upper", 
    "slope_intercept", "slope_intercept_lower", "slope_intercept_upper",
    "slope_sig",
    # prediction
    "pred_G", "pred_G_lower", "pred_G_upper",
    "pred_S", "pred_S_lower", "pred_S_upper",
    "pred_w", "pred_w_lower", "pred_w_upper"
])

for i in range(1000):
    G = np.random.uniform(0.1, 0.5)
    S = np.random.uniform(0.01, 0.2)
    e = 0.005
    
    ws = [
        1 / np.random.uniform(0.55, 0.95), # slow
        1 / np.random.uniform(0.45, 0.55), # similar
        1 / np.random.uniform(0.01, 0.45) # fast
    ]
    
    for w in ws:
        # 각 시나리오별로 계산 및 저장
        
        for scenario_name, scenario_func in scenarios.items():
            # FR-reg 계수 계산
            df_frreg = scenario_func(G, S, w, e)
            
            # resampling
            df_lmbds = obj1.resampleFrregCoefficients(
                df_frreg, 
                n_resample=100
            )    
            
            # slope-test
            df_frlogreg = obj1.familialRelationshipLogRegression(df_lmbds)
            lower, upper = np.quantile(df_frlogreg["slope"], [0.025, 0.975])
            
            # prediction
            df_gsw = obj1.prediction(
                df_lmbds, 
                [lower, upper],
                print_prog=True
            )
            
            # save reseults
            sig = "similar"
            if lower > 1:
                sig = "fast"
            if upper < 1:
                sig = "slow"

            slope_slope = df_frlogreg["slope"].median()
            slope_slope_lower = lower
            slope_slope_upper = upper
            slope_intercept = df_frlogreg["intercept"].median()
            slope_intercept_lower = np.quantile(df_frlogreg["intercept"], 0.025)
            slope_intercept_upper = np.quantile(df_frlogreg["intercept"], 0.975)
            
            pred_G = df_gsw["V(g)"].median()
            pred_G_lower = np.quantile(df_gsw["V(g)"], 0.025)
            pred_G_upper = np.quantile(df_gsw["V(g)"], 0.975)
            pred_S = df_gsw["V(s)"].median()
            pred_S_lower = np.quantile(df_gsw["V(s)"], 0.025)
            pred_S_upper = np.quantile(df_gsw["V(s)"], 0.975)
            pred_w = 1/df_gsw["w"].median()
            pred_w_lower = 1/np.quantile(df_gsw["w"], 0.025)
            pred_w_upper = 1/np.quantile(df_gsw["w"], 0.975)
            
            df_results.loc[len(df_results)] = [
                # true values
                scenario_name, G, S, w,
                # slope-test
                slope_slope, slope_slope_lower, slope_slope_upper,
                slope_intercept, slope_intercept_lower, slope_intercept_upper,
                sig,
                # prediction
                pred_G, pred_G_lower, pred_G_upper,
                pred_S, pred_S_lower, pred_S_upper,
                pred_w, pred_w_lower, pred_w_upper,
            ]
            break
        break
    break

df_results.to_csv(
    "/data/jerrylee/pjt/BIGFAM.v.2.0/data/simulation/scenarios/SC1-SC4.tsv",
    sep='\t',
    index=False
)

1..2..3..4..5..6..7..8..9..10..