In [13]:
import pandas as pd
import numpy as np; np.random.seed(42)
import os
import matplotlib.pyplot as plt; plt.style.use("dark_background")
import seaborn as sns
import sys; sys.path.append(
    "/data/jerrylee/pjt/BIGFAM.v.2.0"
)
from src import obj1, tools
from tqdm import tqdm

In [14]:
# ad-hoc functions
def simul_frreg(G, S, w, dor):
    """Make FR-reg coefficient.
    
    G : heritability
    S : variance component by shared env (d=1)
    w : decaying parameter for S
    dor : degree of relatedness
    """
    by_G = G * (1/2)**dor
    by_S = S * (1/w)**(dor-1)
    
    return by_G + by_S

# Step 1. DOR level FR-reg

In [2]:
# 시뮬레이션 파라미터 설정
Gs = np.random.uniform(0.1, 0.8, size = 10)
Ss = np.random.uniform(0.01, 0.2, size = 10)
ws = np.concatenate([
    np.random.uniform(0.2, 0.9, size=10),
    np.random.uniform(1.1, 1.9, size=10),
    np.random.uniform(2.1, 5.0, size=10)
])
es = [0.05, 0.01, 0.005]

In [3]:
Gs, Ss, ws

(array([0.36217808, 0.76550001, 0.61239576, 0.51906094, 0.20921305,
        0.20919616, 0.14065853, 0.7063233 , 0.52078051, 0.5956508 ]),
 array([0.01391105, 0.19428287, 0.1681641 , 0.05034443, 0.04454674,
        0.04484686, 0.06780603, 0.10970372, 0.09206955, 0.06533354]),
 array([0.62829703, 0.2976457 , 0.40450125, 0.45645329, 0.51924899,
        0.74962317, 0.33977165, 0.55996411, 0.6146902 , 0.23251529,
        1.58603588, 1.2364193 , 1.15204127, 1.85910843, 1.87250563,
        1.74671788, 1.34369102, 1.17813769, 1.64738642, 1.45212199,
        2.45391088, 3.53601304, 2.19972671, 4.73702917, 2.85046195,
        4.02131462, 3.00396212, 3.60819726, 3.68545981, 2.63607792]))

In [5]:
# 저장 경로 설정
save_dir = "/data/jerrylee/pjt/BIGFAM.v.2.0/data/simulation/frreg"

# 모든 조합에 대해 시뮬레이션 실행
for G in Gs:
    for S in Ss:
        for w in ws:
            for e in es:
                # 파일명 생성
                g_str = f"{int(G*1000)}"
                s_str = f"{int(S*1000)}"
                w_str = f"{int(w*1000)}"
                e_str = f"{int(e*1000)}"
                
                filename = f"G_{g_str}-S_{s_str}-w_{w_str}-e_{e_str}.DOR.frreg"
                
                # FR-reg 계수 계산
                df_frreg = pd.DataFrame({
                    "DOR": [1, 2, 3],
                    "slope": [simul_frreg(G, S, w, d) for d in range(1, 4)],
                    "se": e,
                    "p": 0.0,  # 임시로 0.0 설정
                    "n": 0     # 임시로 0 설정
                })
                
                # 파일 저장
                df_frreg.to_csv(
                    f"{save_dir}/{filename}",
                    sep='\t',
                    index=False
                )

# Step 2. Four shared environmental scenarios

1. All asumption valid
2. 1st degree specific
3. Maternal effect
4. 2nd degree dominant(ex. 0.05, 0.1, 0.01)

In [15]:
def scenario_1_valid(G, S, w, e):
    """Calculate FR-reg coefficients under standard conditions.
    
    This scenario represents the baseline case where both genetic and 
    environmental effects follow expected patterns of decay across generations.
    
    Args:
        G (float): Heritability (0-1)
        S (float): Shared environmental component (0-1)
        w (float): Environmental decay parameter (>1)
        e (float): Standard error of FR-reg coefficient
        
    Returns:
        DataFrame: FR-reg coefficients for degrees 1-3
    """
    df_frreg = pd.DataFrame({
        "DOR": [1, 2, 3],
        "slope": [simul_frreg(G, S, w, d) for d in range(1, 4)],
        "se": e,
        "p": 0.0,
        "n": 0
    })
    return df_frreg

def scenario_2_1st_deg(G, S, w, e):
    """Calculate FR-reg coefficients with shared environment only in first-degree relatives.
    
    This scenario models cases where environmental effects are only present
    between first-degree relatives (e.g., parent-child, siblings) and absent
    in more distant relationships.
    
    Args:
        G (float): Heritability (0-1)
        S (float): Shared environmental component (only applies to first degree)
        w (float): Environmental decay parameter (>1)
        e (float): Standard error of FR-reg coefficien
        
    Returns:
        DataFrame: FR-reg coefficients for degrees 1-3
    """
    df_frreg = pd.DataFrame({
        "DOR": [1, 2, 3],
        "slope": [simul_frreg(G, S, w, dor=1), 
                 simul_frreg(G, 0, w, dor=2),
                 simul_frreg(G, 0, w, dor=3),],
        "se_slope": e,
    })
    return df_frreg

def scenario_3_maternal(G, S, w, e):
    """Calculate FR-reg coefficients with enhanced maternal effects.
    
    This scenario models stronger environmental effects in mother-offspring
    relationships compared to other first-degree relationships.
    Among the 7 types of first-degree relationships 
    (mother-son, mother-daughter, father-son, father-daughter, 
    brother-brother, sister-sister, brother-sister), 
    only the 2 maternal relationships (mother-son, mother-daughter) 
    have 1.5x stronger environmental effects.
    The final FR-reg coefficient for DOR=1 is calculated as the average 
    of all 7 relationship types.
    
    Args:
        G (float): Heritability (0-1)
        S (float): Base shared environmental component
        w (float): Environmental decay parameter
        e (float): Standard error of FR-reg coefficien
        
    Returns:
        DataFrame: FR-reg coefficients for degrees 1-3
    """
    frreg_mother_offspring = simul_frreg(G, 1.5*S, w=1, dor=1)
    frreg_others = simul_frreg(G, S, w=1, dor=1)
    
    frreg_1st = (2 * frreg_mother_offspring + 5 * frreg_others) / 7
    
    df_frreg = pd.DataFrame({
        "DOR": [1, 2, 3],
        "slope": [frreg_1st, 
                 simul_frreg(G, S, w, dor=2), 
                 simul_frreg(G, S, w, dor=3),],
        "se_slope": e,
    })
    return df_frreg

def scenario_4_2nd_deg(G, S, w, e):
    """Calculate FR-reg coefficients with enhanced second-degree effects.
    
    This scenario models cases where environmental effects are particularly
    strong in second-degree relatives (3x), which might occur in extended
    family settings or specific cultural contexts.
    
    Args:
        G (float): Heritability (0-1)
        S (float): Base shared environmental component
        w (float): Environmental decay parameter
        e (float): Standard error of FR-reg coefficien
        
    Returns:
        DataFrame: FR-reg coefficients for degrees 1-3
    """
    df_frreg = pd.DataFrame({
        "DOR": [1, 2, 3],
        "slope": [simul_frreg(G, S, w, dor=1), 
                 simul_frreg(G, 3*S, w, dor=2),
                 simul_frreg(G, S, w, dor=3),],
        "se_slope": e,
    })
    return df_frreg

In [16]:
# 시뮬레이션 파라미터 설정
Gs = np.random.uniform(0.1, 0.5, size = 10)
Ss = np.random.uniform(0.01, 0.2, size = 10)
ws = np.concatenate([
    np.random.uniform(0.2, 0.9, size=10),
    np.random.uniform(1.1, 1.9, size=10),
    np.random.uniform(2.1, 5.0, size=10)
])
es = [0.005]

len(Gs) * len(Ss) * len(ws) * len(es)

3000

In [17]:
# 저장 경로 설정
save_dir = "/data/jerrylee/pjt/BIGFAM.v.2.0/data/simulation/scenarios/frreg"

scenarios = {
    "SC1": scenario_1_valid,
    "SC2": scenario_2_1st_deg,
    "SC3": scenario_3_maternal,
    "SC4": scenario_4_2nd_deg
}

# 모든 조합에 대해 시뮬레이션 실행
for G in Gs:
    for S in Ss:
        for w in ws:
            for e in es:
                # 파일명 생성
                g_str = f"{int(G*1000)}"
                s_str = f"{int(S*1000)}"
                w_str = f"{int(w*1000)}"
                e_str = f"{int(e*1000)}"
                
                # 각 시나리오별로 계산 및 저장
                for scenario_name, scenario_func in scenarios.items():
                    # 파일명 생성
                    filename = f"G_{g_str}-S_{s_str}-w_{w_str}-e_{e_str}.{scenario_name}.frreg"
                    
                    # FR-reg 계수 계산
                    df_frreg = scenario_func(G, S, w, e)
                    
                    # 파일 저장
                    df_frreg.to_csv(
                        f"{save_dir}/{filename}",
                        sep='\t',
                        index=False
                    )