In [11]:
import numpy as np
import pandas as pd
from google.colab import files
from scipy.stats import lognorm

In [12]:
print("Upload '202501_clean2.xlsx'")
up = files.upload()
INPUT = [k for k in up.keys() if k.lower().endswith(('.xlsx','.xls'))][0]
df = pd.read_excel(INPUT)

Upload '202501_clean2.xlsx'


Saving 202501_clean2.xlsx to 202501_clean2 (1).xlsx


In [13]:
# 컬럼 정의
metals_ng = ["Cr(ng/m3)","Co(ng/m3)","Ni(ng/m3)",
             "As(ng/m3)","Cd(ng/m3)","Sb(ng/m3)","Pb(ng/m3)"]

In [14]:
# 단위 변환 : ng/m3 -> ug/m3
metals_ng = ["Cr(ng/m3)","Co(ng/m3)","Ni(ng/m3)","As(ng/m3)","Cd(ng/m3)","Sb(ng/m3)","Pb(ng/m3)"]
for c in metals_ng:
    ug = c.replace("(ng/m3)","(ug/m3)")
    df[ug] = df[c] / 1000.0
metals_ug = [c.replace("(ng/m3)","(ug/m3)") for c in metals_ng]

In [15]:
# 흡입 단위 위해도(IUR) 설정
IUR = {
    "As(ug/m3)": 4.3e-3,   # IRIS
    "Cd(ug/m3)": 1.8e-3,   # IRIS
    "Co(ug/m3)": 9.0e-3,   # PPRTV
    "Cr(ug/m3)": 1.2e-2,   # IRIS
    "Ni(ug/m3)": 2.4e-4,   # IRIS
    "Pb(ug/m3)": 1.2e-5,   # CalEPA
    "Sb(ug/m3)": 2.29e-6  # CalEPA
}

In [16]:
# 초과 발암 위해도(ECR) 계산
# -> ECR = C(ug/m3) * K * IUR
def ecr_per_metal(C_ug_m3, K, iur):
    return C_ug_m3 * K * iur

def ecr_total_row(row, K):
    total = 0.0
    for m in metals_ug:
        total += ecr_per_metal(row[m], K, IUR[m])
    return total

# 고정값(fixed) 계산
# K_fixed = (AcT_out/1440) * (EF/365) * (ED/LT)
# 표: AcT_out=76.2분/일, EF=350일/년, ED=30년, LT=70년
ACT_OUT, EF, ED, LT = 76.2, 350, 30, 70
K_fixed = (ACT_OUT/1440.0) * (EF/365.0) * (ED/float(LT))  # 0.021747

# 금속별 초과 발암 위해도(ECR) 계산 + 총합
# -> 그날 측정된 모든 금속 농도에 동시에 노출되었을 때의 초과 발암 위해도
for m in metals_ug:
    col = f"ECR_{m.split('(')[0]}"
    df[col] = df[m] * K_fixed * IUR[m]
df["ECR_total_fixed"] = df.apply(lambda r: ecr_total_row(r, K_fixed), axis=1)

# 요약
summary_fixed = df[[f"ECR_{m.split('(')[0]}" for m in metals_ug] + ["ECR_total_fixed"]].describe()

In [17]:
# 분포기반 계산 (Table S2 참고)
# 대상: AcT_out만 로그정규분포
# preset은 Table S2의 5%, 95%를 사용

# Table S2 성인 preset(5%/95%) 매핑: AcT_out(min/day)
ADULT_ACT_PRESETS = {"18-<25": (14.455, 250.0),
                     "25-<35": (6.516, 220.0),
                     "35-<45": (5.789, 195.0),
                     "45-<55": (6.401, 260.0),
                     "55-<65": (8.083, 350.0),
                     "65-<78.6": (6.094, 390.0)}

def lognorm_params_from_q5q95(q5, q95):
    # ln(q_p) = mu + sigma * z_p  (z_95≈1.64485, z_05≈-1.64485)
    z = 1.6448536269514722
    sigma = (np.log(q95) - np.log(q5)) / (2*z)
    mu = np.log(q5) + z * sigma
    return sigma, np.exp(mu)

In [18]:
# 시나리오 설정
preset = "25-<35"   # typical adult 시나리오 (원하면 "65-<78.6" 등으로 교체)
N = 10000           # Monte Carlo 샘플 수
ED_mode = "fixed"   # "fixed"=30년 유지 / "by_age"=Table S2 성인구간별 ED 적용

q5, q95 = ADULT_ACT_PRESETS[preset]
s, scale = lognorm_params_from_q5q95(q5, q95)
rng = np.random.default_rng(42)
ACT_samples = lognorm(s=s, scale=scale).rvs(size=N, random_state=42)

In [19]:
# ED(노출기간((년))) 선택
ED_BY_AGE = {"18-<25": 7.0, "25-<35": 10.0, "35-<45": 10.0,
             "45-<55": 10.0, "55-<65": 10.0, "65-<78.6": 13.6}
ED_used = ED_BY_AGE[preset] if ED_mode == "by_age" else ED

# 샘플별 K_dist 계산
K_dist = (ACT_samples/1440.0) * (EF/365.0) * (ED_used/float(LT))

In [20]:
# 총 초과 발암 위해도(ECR) 분포 계산 (K의 불확실성 반영)
C_mean = df[metals_ug].mean().to_dict()
def ecr_total_with_K(C_dict, K_array):
    # K_array: (N,), 반환: (N,)
    total = np.zeros_like(K_array, dtype=float)
    for m in metals_ug:
        total += C_dict[m] * K_array * IUR[m]
    return total

E_total_dist = ecr_total_with_K(C_mean, K_dist)

In [21]:
# 분포 요약
summary_dist = {"preset": preset,
                "ED_mode": ED_mode,
                "N": N,
                "K_mean": float(np.mean(K_dist)),
                "K_p05": float(np.percentile(K_dist,5)),
                "K_p95": float(np.percentile(K_dist,95)),
                "ECR_total_mean": float(np.mean(E_total_dist)),
                "ECR_total_median": float(np.median(E_total_dist)),
                "ECR_total_p05": float(np.percentile(E_total_dist,5)),
                "ECR_total_p95": float(np.percentile(E_total_dist,95)),
                "ECR_total_max": float(np.max(E_total_dist)),
                "P(ECR_total>=1e-6)": float(np.mean(E_total_dist >= 1e-6))}

In [22]:
# 저장
df.to_excel("ECR_results_fixed_rowwise.xlsx", index=False)
pd.DataFrame(summary_fixed).to_excel("ECR_summary_fixed_describe.xlsx")
pd.DataFrame([summary_dist]).to_excel("ECR_summary_dist_total.xlsx", index=False)

print("저장 완료:")
print(" - ECR_results_fixed_rowwise.xlsx (행별 고정값 결과)")
print(" - ECR_summary_fixed_describe.xlsx (고정값 기술통계)")
print(" - ECR_summary_dist_total.xlsx (Table S2 분포 시나리오 요약)")

# 결과 출력
print("\n[고정값 요약 통계 (상위 10행)]")
print(pd.DataFrame(summary_fixed).head(10))
print("\n[분포 시나리오 요약]")
print(summary_dist)

저장 완료:
 - ECR_results_fixed_rowwise.xlsx (행별 고정값 결과)
 - ECR_summary_fixed_describe.xlsx (고정값 기술통계)
 - ECR_summary_dist_total.xlsx (Table S2 분포 시나리오 요약)

[고정값 요약 통계 (상위 10행)]
             ECR_Cr        ECR_Co        ECR_Ni  ECR_As      ECR_Cd  \
count  3.520000e+02  4.490000e+02  3.260000e+02   449.0  449.000000   
mean   7.748181e-07  1.535607e-06  1.552641e-08     0.0    0.000008   
std    4.219507e-07  7.614202e-07  9.408741e-09     0.0    0.000002   
min    7.828767e-09  3.346798e-07  0.000000e+00     0.0    0.000004   
25%    4.514589e-07  9.551096e-07  8.102774e-09     0.0    0.000007   
50%    7.854863e-07  1.481594e-06  1.377863e-08     0.0    0.000008   
75%    1.064060e-06  2.053094e-06  2.199884e-08     0.0    0.000010   
max    1.894562e-06  3.798909e-06  4.086616e-08     0.0    0.000014   

             ECR_Sb        ECR_Pb  ECR_total_fixed  
count  2.320000e+02  3.950000e+02       186.000000  
mean   8.605166e-10  2.332920e-09         0.000012  
std    6.342097e-10  2.1155