In [7]:
# ============================================================
# Monte Carlo ECR (Age-weighted) – Final Integrated Pipeline (patched)
#  - Preprocess (unit & 5–95% trimming)
#  - Derive Cr(VI)=Cr/7
#  - Distribution fitting (AD parametric bootstrap p with refit, incl. GumbelR/L)
#  - Summary matrix + detailed tables
#  - 10k sampling from best-fit dists (Cr(VI)=Cr_sim/7)
#  - Age-weighted K_total + IUR → LADD/ECR
#  * Patches:
#     (1) pd.Series(..., dtype=float)
#     (2) scipy.stats.rvs(random_state=<numpy.random.Generator>)
# ============================================================

# 0) Imports & Settings
from google.colab import files
import os, re, math, json, warnings
import numpy as np, pandas as pd
from scipy import stats
from scipy.stats import (gumbel_r, gumbel_l, lognorm, weibull_min, logistic, t,
                         norm, gamma, beta, triang, expon, pareto, uniform, chi2)
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.worksheet.table import Table, TableStyleInfo

warnings.filterwarnings("ignore")
np.random.seed(20250912)
N_SIM = 10_000
BOOTSTRAP_B = 300  # AD p 재적합 부트스트랩 반복

print("👉 '202501_clean2.xlsx' 업로드 해주세요")
uploaded = files.upload()
INPUT_XLSX = next(k for k in uploaded if str(k).lower().endswith(('.xlsx','.xls')))
raw = pd.read_excel(INPUT_XLSX)
print("원본데이터 shape:", raw.shape)

# ============================================================
# 1) 컬럼 매핑 + 단위 정규화(ng→µg) + 5–95% 트리밍
PATTERNS = {
    "Cr(VI)": r"(?:\bCr\s*\(?VI\)?\b|Cr6|Hexa(?:valent)?\s*Chrom)",
    "Cr"    : r"(?:\bCr\b(?!\s*\(?VI\)?))",
    "Co"    : r"(?:\bCo\b|Cobalt)",
    "Ni"    : r"(?:\bNi\b|Nickel)",
    "As"    : r"(?:\bAs\b|Arsenic)",
    "Cd"    : r"(?:\bCd\b|Cadmium)",
    "Sb"    : r"(?:\bSb\b|Antimony)",
    "Pb"    : r"(?:\bPb\b|Lead)",
}
ORDER = ['Cr','Cr(VI)','Co','Ni','As','Cd','Sb','Pb']

def find_col(df, regex):
    for c in df.columns:
        if re.search(regex, str(c), flags=re.I):
            return c
    return None

def to_ug_m3(series, colname):
    name = str(colname)
    s = pd.to_numeric(series, errors='coerce').replace([np.inf, -np.inf], np.nan)
    if re.search(r'(?i)\bng\b|ng/?m', name):
        return s/1000.0, 'converted_from_ng'
    return s, 'as_is_ug'

series_map, log_rows = {}, []
for metal in ORDER:
    col = find_col(raw, PATTERNS.get(metal, r"$^$"))
    if col is None:
        log_rows.append((metal, None, 'missing', 0, np.nan)); continue
    val, how = to_ug_m3(raw[col], col)
    series_map[metal] = val
    log_rows.append((metal, col, how, int(val.notna().sum()), float(np.nanmean(val))))

log = pd.DataFrame(log_rows, columns=['Metal','Matched_Column','Unit_Status','N_nonNa','Mean(ug/m3)'])
print("\n[매핑/단위 로그]"); display(log)

# 5–95% 트리밍
trimmed = {}
for m, s in series_map.items():
    x = s.dropna()
    x = x[x > 0]
    if x.size == 0: continue
    q5, q95 = np.percentile(x, [5,95])
    trimmed[m] = x[(x>=q5) & (x<=q95)]

# Cr(VI) 파생: 실측이 없거나 공란이면 Cr/7로 생성
if 'Cr' in trimmed and (('Cr(VI)' not in trimmed) or trimmed['Cr(VI)'].empty):
    trimmed['Cr(VI)'] = trimmed['Cr'] / 7.0

os.makedirs("preprocessed", exist_ok=True)
with pd.ExcelWriter(os.path.join("preprocessed","preprocessed_data.xlsx")) as w:
    for m in ORDER:
        if m in trimmed: trimmed[m].to_excel(w, sheet_name=m, index=False)
log.to_excel(os.path.join("preprocessed","preprocess_log.xlsx"), index=False)
print("전처리 완료 ▶ preprocessed/*.xlsx 저장")

# ============================================================
# 2) 분포 후보 정의 + 적합(AD 재적합 부트스트랩 p 포함) + 선택 규칙
POS_ONLY = {'로그 정규','와이블','감마','지수','파레토'}  # 양수 정의

def hist_mode_estimate(x):
    x = np.asarray(x, float); x = x[np.isfinite(x)]
    n = x.size
    if n < 2: return float(np.nanmedian(x)) if n else np.nan
    iqr = np.subtract(*np.percentile(x,[75,25]))
    bins = max(10, int(np.ceil(np.sqrt(n)))) if iqr<=0 else max(10, int(np.ceil((x.max()-x.min())/(2*iqr*n**(-1/3)))))
    cnt, edges = np.histogram(x, bins=bins); i = int(cnt.argmax())
    return float((edges[i] + edges[i+1]) / 2)

# 간단 래퍼
class Dist:
    def __init__(self, name): self.name=name; self.p={}; self.np=None; self.valid=False
    def _ok(self, p, np_): self.p=p; self.np=np_; self.valid=True; return self
    def cdf(self, z): raise NotImplementedError
    def ppf(self, q): raise NotImplementedError
    def rvs(self, n, rng=None): raise NotImplementedError  # patched signature

class LogNormal(Dist):
    def __init__(self): super().__init__('로그 정규')
    def fit(self,x):
        x=x[x>0]
        try: s,loc,sc=lognorm.fit(x, floc=0); assert s>0 and sc>0; return self._ok({'s':s,'scale':sc},2)
        except: return self
    def cdf(self,z): return lognorm.cdf(z, s=self.p['s'], loc=0, scale=self.p['scale'])
    def ppf(self,q): return lognorm.ppf(q, s=self.p['s'], loc=0, scale=self.p['scale'])
    def rvs(self,n, rng=None): return lognorm.rvs(self.p['s'], loc=0, scale=self.p['scale'], size=n, random_state=rng)

class Weibull(Dist):
    def __init__(self): super().__init__('와이블')
    def fit(self,x):
        x=x[x>0]
        try: c,loc,sc=weibull_min.fit(x, floc=0); assert c>0 and sc>0; return self._ok({'c':c,'scale':sc},2)
        except: return self
    def cdf(self,z): return weibull_min.cdf(z, c=self.p['c'], loc=0, scale=self.p['scale'])
    def ppf(self,q): return weibull_min.ppf(q, c=self.p['c'], loc=0, scale=self.p['scale'])
    def rvs(self,n, rng=None): return weibull_min.rvs(self.p['c'], loc=0, scale=self.p['scale'], size=n, random_state=rng)

class Gamma_(Dist):
    def __init__(self): super().__init__('감마')
    def fit(self,x):
        x=x[x>0]
        try: a,loc,sc=gamma.fit(x, floc=0); assert a>0 and sc>0; return self._ok({'a':a,'scale':sc},2)
        except: return self
    def cdf(self,z): return gamma.cdf(z, a=self.p['a'], loc=0, scale=self.p['scale'])
    def ppf(self,q): return gamma.ppf(q, a=self.p['a'], loc=0, scale=self.p['scale'])
    def rvs(self,n, rng=None): return gamma.rvs(self.p['a'], loc=0, scale=self.p['scale'], size=n, random_state=rng)

class Logistic(Dist):
    def __init__(self): super().__init__('로지스틱')
    def fit(self,x):
        try: loc,sc=logistic.fit(x); assert sc>0; return self._ok({'loc':loc,'scale':sc},2)
        except: return self
    def cdf(self,z): return logistic.cdf(z, **self.p)
    def ppf(self,q): return logistic.ppf(q, **self.p)
    def rvs(self,n, rng=None): return logistic.rvs(size=n, **self.p, random_state=rng)

class Normal(Dist):
    def __init__(self): super().__init__('정규')
    def fit(self,x):
        try: mu,sig=norm.fit(x); assert sig>0; return self._ok({'loc':mu,'scale':sig},2)
        except: return self
    def cdf(self,z): return norm.cdf(z, **self.p)
    def ppf(self,q): return norm.ppf(q, **self.p)
    def rvs(self,n, rng=None): return norm.rvs(size=n, **self.p, random_state=rng)

class StudentT(Dist):
    def __init__(self): super().__init__('스튜던트의 t')
    def fit(self,x):
        try: df_,loc,sc=t.fit(x); assert df_>0 and sc>0; return self._ok({'df':df_, 'loc':loc, 'scale':sc},3)
        except: return self
    def cdf(self,z): return t.cdf(z, **self.p)
    def ppf(self,q): return t.ppf(q, **self.p)
    def rvs(self,n, rng=None): return t.rvs(size=n, **self.p, random_state=rng)

class Exponential_(Dist):
    def __init__(self): super().__init__('지수')
    def fit(self,x):
        x=x[x>0]
        try: loc,sc=expon.fit(x, floc=0); assert sc>0; return self._ok({'scale':sc},1)
        except: return self
    def cdf(self,z): return expon.cdf(z, loc=0, scale=self.p['scale'])
    def ppf(self,q): return expon.ppf(q, loc=0, scale=self.p['scale'])
    def rvs(self,n, rng=None): return expon.rvs(size=n, loc=0, scale=self.p['scale'], random_state=rng)

class BetaPERT_(Dist):
    def __init__(self, lam=4.0): super().__init__('BetaPERT'); self.lam=lam
    def fit(self,x):
        a,b = float(np.min(x)), float(np.max(x))
        if not(np.isfinite(a) and np.isfinite(b) and b>a): return self
        m = float(np.clip(hist_mode_estimate(x), a+1e-9, b-1e-9))
        al = 1 + self.lam*(m-a)/(b-a); be = 1 + self.lam*(b-m)/(b-a)
        if al<=0 or be<=0: return self
        return self._ok({'a':a,'b':b,'alpha':al,'beta':be,'m':m},2)
    def cdf(self,z): return beta.cdf((z-self.p['a'])/(self.p['b']-self.p['a']), self.p['alpha'], self.p['beta'])
    def ppf(self,q): return self.p['a']+(self.p['b']-self.p['a'])*beta.ppf(q, self.p['alpha'], self.p['beta'])
    def rvs(self,n, rng=None):
        r=beta.rvs(self.p['alpha'], self.p['beta'], size=n, random_state=rng)
        return self.p['a']+(self.p['b']-self.p['a'])*r

class Triangular_(Dist):
    def __init__(self): super().__init__('삼각형')
    def fit(self,x):
        a,b = float(np.min(x)), float(np.max(x))
        if not(np.isfinite(a) and np.isfinite(b) and b>a): return self
        m = float(np.clip(hist_mode_estimate(x), a+1e-9, b-1e-9))
        c = (m-a)/(b-a)
        if not(0<c<1): return self
        return self._ok({'a':a,'b':b,'m':m,'c':c},3)
    def cdf(self,z): return triang.cdf(z, c=self.p['c'], loc=self.p['a'], scale=(self.p['b']-self.p['a']))
    def ppf(self,q): return triang.ppf(q, c=self.p['c'], loc=self.p['a'], scale=(self.p['b']-self.p['a']))
    def rvs(self,n, rng=None): return triang.rvs(self.p['c'], loc=self.p['a'], scale=(self.p['b']-self.p['a']), size=n, random_state=rng)

class Uniform_(Dist):
    def __init__(self): super().__init__('균일')
    def fit(self,x):
        a,b = float(np.min(x)), float(np.max(x))
        if not(np.isfinite(a) and np.isfinite(b) and b>a): return self
        return self._ok({'loc':a,'scale':(b-a)},2)
    def cdf(self,z): return uniform.cdf(z, **self.p)
    def ppf(self,q): return uniform.ppf(q, **self.p)
    def rvs(self,n, rng=None): return uniform.rvs(size=n, **self.p, random_state=rng)

class GumbelR_(Dist):
    def __init__(self): super().__init__('최대 극값')
    def fit(self,x):
        try: loc,sc=gumbel_r.fit(x); assert np.isfinite(loc) and sc>0; return self._ok({'loc':loc,'scale':sc},2)
        except: return self
    def cdf(self,z): return gumbel_r.cdf(z, **self.p)
    def ppf(self,q): return gumbel_r.ppf(q, **self.p)
    def rvs(self,n, rng=None): return gumbel_r.rvs(size=n, **self.p, random_state=rng)

class GumbelL_(Dist):
    def __init__(self): super().__init__('최소 극값')
    def fit(self,x):
        try: loc,sc=gumbel_l.fit(x); assert np.isfinite(loc) and sc>0; return self._ok({'loc':loc,'scale':sc},2)
        except: return self
    def cdf(self,z): return gumbel_l.cdf(z, **self.p)
    def ppf(self,q): return gumbel_l.ppf(q, **self.p)
    def rvs(self,n, rng=None): return gumbel_l.rvs(size=n, **self.p, random_state=rng)

def AD_stat(x, cdf, eps=1e-12):
    x = np.sort(np.asarray(x, float)); n = x.size
    if n < 5: return np.inf
    u = np.clip(cdf(x), eps, 1-eps); i = np.arange(1,n+1)
    return float(-n - np.sum((2*i-1)*(np.log(u)+np.log(1-u[::-1])))/n)

def AD_p_boot_refit(x, dist_obj, B=BOOTSTRAP_B):
    x = np.asarray(x, float); n = x.size
    if n < 5 or not dist_obj.valid: return np.nan
    A2_obs = AD_stat(x, dist_obj.cdf)
    ge = 0; m = 0
    # 각 부트스트랩에서 refit
    for _ in range(B):
        rng = np.random.default_rng()  # 독립 난수원
        xs = dist_obj.rvs(n, rng=rng)
        d_bs = type(dist_obj)()
        d_bs.fit(xs)
        if not d_bs.valid: continue
        A2_bs = AD_stat(xs, d_bs.cdf)
        ge += (A2_bs >= A2_obs); m += 1
    return float((ge + 1) / (m + 1)) if m>0 else np.nan

def KS_stat_p(x, dist_obj):
    try: D,p = stats.kstest(x, lambda z: dist_obj.cdf(z)); return float(D), float(p)
    except: return np.nan, np.nan

def Chi2_stat_p(x, dist_obj):
    try:
        n=len(x); N=max(5, min(50, n//5)); eps=1e-6
        qs=np.linspace(eps, 1-eps, N+1); edges=np.unique(dist_obj.ppf(qs))
        if len(edges) < 3: return np.nan, np.nan
        obs,_ = np.histogram(x, bins=edges); exp = np.diff(qs)*n
        k = dist_obj.np or 0; df = len(obs)-1-k
        if df <= 0: return np.nan, np.nan
        exp=np.maximum(exp[:len(obs)], 1e-9)
        chi=np.sum((obs-exp)**2/exp); p=1.0-chi2.cdf(chi, df)
        return float(chi), float(p)
    except: return np.nan, np.nan

def fit_one_series(x):
    # PATCH: dtype=float (위치 인자 금지)
    x = pd.Series(x, dtype=float).replace([np.inf,-np.inf], np.nan).dropna().values
    if x.size < 20: return None
    cands = [LogNormal(), Gamma_(), Weibull(), Logistic(), Normal(), StudentT(),
             Exponential_(), BetaPERT_(), Triangular_(), Uniform_(), GumbelR_(), GumbelL_()]
    rows=[]
    for d in cands:
        d.fit(x)
        if not d.valid:
            rows.append({'name':d.name,'AD':np.inf,'ADp':np.nan,'KSp':-np.inf,'Chi2p':-np.inf,'np':1e9,'p':d.p,'dist':d})
            continue
        xe = x[x>0] if d.name in POS_ONLY else x
        A2 = AD_stat(xe, d.cdf)
        pAD = AD_p_boot_refit(xe, d, B=BOOTSTRAP_B)  # 재적합 부트스트랩 p
        D,p = KS_stat_p(xe, d); chi,pc = Chi2_stat_p(xe, d)
        rows.append({'name':d.name,'AD':A2,'ADp':pAD,'KSp':p,'Chi2p':pc,'np':d.np or 9,'p':d.p,'dist':d})
    df = pd.DataFrame(rows)
    df['_key'] = list(zip(df['AD'].fillna(np.inf),
                          (-df['KSp']).fillna(np.inf),
                          (-df['Chi2p']).fillna(np.inf),
                          df['np'].fillna(np.inf)))
    df = df.sort_values('_key', kind='mergesort')
    best = df.iloc[0]
    return best, df

def pretty_params(name, p):
    try:
        if name=='로그 정규': return f"s={p['s']:.4g}, scale={p['scale']:.4g}"
        if name=='와이블':   return f"c={p['c']:.4g}, scale={p['scale']:.4g}"
        if name=='감마':     return f"a={p['a']:.4g}, scale={p['scale']:.4g}"
        if name in ['정규','로지스틱','최대 극값','최소 극값']: return f"loc={p['loc']:.4g}, scale={p['scale']:.4g}"
        if name=='지수':     return f"scale={p['scale']:.4g}"
        if name=='BetaPERT': return f"a={p['a']:.4g}, m={p['m']:.4g}, b={p['b']:.4g}, α={p['alpha']:.3g}, β={p['beta']:.3g}"
        if name=='삼각형':   return f"a={p['a']:.4g}, m={p['m']:.4g}, b={p['b']:.4g}"
        if name=='균일':     return f"min={p['loc']:.4g}, max={(p['loc']+p['scale']):.4g}"
        return json.dumps(p, ensure_ascii=False)
    except: return json.dumps(p, ensure_ascii=False)

# ===== 분포 요약표 & 상세표
os.makedirs("fit_outputs", exist_ok=True)
summary_matrix = pd.DataFrame(index=['분포:','최선 적합:','앤더슨-달링:','P 값:'])
fit_info = {}

for m in [mm for mm in ORDER if mm in trimmed]:
    x = trimmed[m].values
    if m=='Cr(VI)' and 'Cr' in fit_info:
        best_name = fit_info['Cr']['best_name'] + " (scaled 1/7)"
        ad_val = fit_info['Cr']['fit_table'].iloc[0]['AD']
        p_val  = fit_info['Cr']['fit_table'].iloc[0]['ADp']
        summary_matrix[m] = [best_name, f"max={np.max(x):.3g}", f"{ad_val:.4g}", f"{p_val if np.isfinite(p_val) else '---'}"]
        fit_info[m] = {'derived_from':'Cr'}
        continue

    res = fit_one_series(x)
    if res is None:
        summary_matrix[m] = ['---','표본부족','---','---']; continue
    best, df_all = res
    best_name = str(best['name']); best_params = best['p']
    summary_matrix[m] = [best_name,
                         f"max={np.max(x):.3g}",
                         f"{best['AD']:.4g}",
                         f"{best['ADp'] if np.isfinite(best['ADp']) else '---'}"]
    fit_info[m] = {'best_name':best_name,'best_params':best_params,'dist_obj':best['dist'],'fit_table':df_all}

with pd.ExcelWriter(os.path.join("fit_outputs","fit_summary.xlsx")) as w:
    summary_matrix.to_excel(w, sheet_name="데이터 계열표")
    for m, info in fit_info.items():
        if 'fit_table' in info:
            info['fit_table'].head(10)[['name','AD','ADp','KSp','Chi2p','np']].to_excel(w, sheet_name=f"{m}_상세", index=False)
print("Saved: fit_outputs/fit_summary.xlsx")
display(summary_matrix)

# ============================================================
# 3) 분포표 기반 난수 10,000개 생성 (Cr(VI)=Cr_sim/7)
os.makedirs("mc_from_fit", exist_ok=True)
C_sims = {}

# PATCH: Generator를 만들어 random_state로 전달
seed_master = np.random.SeedSequence(20250912)
def child_rng(ss):
    return np.random.default_rng(ss)

for m in [mm for mm in ORDER if mm in trimmed]:
    if m=='Cr(VI)' and fit_info.get(m,{}).get('derived_from')=='Cr':
        continue
    info = fit_info.get(m)
    if not info or ('dist_obj' not in info):
        x = trimmed[m].values
        idx = np.random.randint(0, x.size, size=N_SIM)
        C_sims[m] = x[idx]
    else:
        dist = info['dist_obj']
        rng = child_rng(seed_master.spawn(1)[0])
        C_sims[m] = dist.rvs(N_SIM, rng=rng)

# Cr(VI) = Cr_sim/7
if 'Cr' in C_sims and ('Cr(VI)' in trimmed):
    C_sims['Cr(VI)'] = C_sims['Cr'] / 7.0
    pd.DataFrame({'Cr_sim(ug/m3)':C_sims['Cr'], 'Cr(VI)_sim(ug/m3)':C_sims['Cr(VI)']}).to_csv("mc_from_fit/Cr_and_CrVI_samples.csv", index=False)

for m, arr in C_sims.items():
    pd.DataFrame({f"{m}_C(ug/m3)": arr}).to_csv(f"mc_from_fit/{m}_C_samples.csv", index=False)

# ============================================================
# 4) Age-weighted K_total + IUR → LADD/ECR
# IUR (per µg/m3)
IUR = {"Cr(VI)":1.20e-02,"Co":9.00e-03,"Ni":2.40e-04,"As":4.30e-03,"Cd":1.80e-03,"Sb":2.29e-06,"Pb":1.20e-05}

EF_days_per_year = 350
LT_years = 78.6
ACT_POINT = {"0-<1":24,"1-<2":84,"2-<3":120,"3-<6":108,"6-<11":132,"11-<16":102,"16-<18":102}
ACT_LN_P5_P95 = {"18-<25":(14.455,250.0),"25-<35":(6.516,220.0),"35-<45":(5.789,195.0),
                 "45-<55":(6.401,260.0),"55-<65":(8.083,350.0),"65-<78.6":(6.094,390.0)}
AGE_ORDER=["0-<1","1-<2","2-<3","3-<6","6-<11","11-<16","16-<18","18-<25","25-<35","35-<45","45-<55","55-<65","65-<78.6"]

_Z95 = 1.6448536269514722
def lognorm_mu_sigma_from_p5_p95(p5,p95):
    ln5, ln95 = math.log(p5), math.log(p95)
    sigma = (ln95 - ln5) / (2 * _Z95)
    mu = (ln5 + ln95) / 2.0
    return mu, sigma

def adaf_for_label(lbl):
    a=float(lbl.split('-')[0]);
    return 10.0 if a<2 else (3.0 if a<16 else 1.0)

def sample_Act_out_for_age(age_label, size):
    if age_label in ACT_POINT:
        return np.full(size, float(ACT_POINT[age_label]), dtype=float)
    p5,p95 = ACT_LN_P5_P95[age_label]
    mu,sigma = lognorm_mu_sigma_from_p5_p95(p5,p95)
    v = np.random.lognormal(mean=mu, sigma=sigma, size=size)
    return np.clip(v, 0, 1440)

def build_K_total_samples(size):
    K_tot=np.zeros(size,float)
    ED_years = {"0-<1":1,"1-<2":1,"2-<3":1,"3-<6":3,"6-<11":5,"11-<16":5,"16-<18":2,
                "18-<25":7,"25-<35":10,"35-<45":10,"45-<55":10,"55-<65":10,"65-<78.6":13.6}
    for age in AGE_ORDER:
        act = sample_Act_out_for_age(age, size)
        ED  = float(ED_years[age])
        ADAF = float(adaf_for_label(age))
        K_age = (act/1440.0) * (EF_days_per_year/365.0) * (ED/LT_years) * ADAF
        K_tot += K_age
    return K_tot

K_tot = build_K_total_samples(N_SIM)
print(f"K_total 요약: mean={np.mean(K_tot):.6g}, P95={np.percentile(K_tot,95):.6g}")

rows=[]
for m, C_sim in C_sims.items():
    if not np.isfinite(IUR.get(m, np.nan)): continue
    ECR = C_sim * IUR[m] * K_tot
    rows.append({'Metal':m,'N_sim':N_SIM,
                 'Mean_ECR':float(np.mean(ECR)),'Median_ECR':float(np.median(ECR)),
                 'P95_ECR':float(np.percentile(ECR,95)),'P99_ECR':float(np.percentile(ECR,99)),
                 'Pr(ECR>1e-6)':float(np.mean(ECR>1e-6))})
ecr_df = pd.DataFrame(rows).set_index('Metal').sort_values('Mean_ECR', ascending=False)
display(ecr_df)
ecr_df.to_excel("mc_from_fit/ECR_summary_from_fit.xlsx")
print("Saved: fit_outputs/fit_summary.xlsx, mc_from_fit/*.csv, mc_from_fit/ECR_summary_from_fit.xlsx")

👉 '202501_clean2.xlsx' 업로드 해주세요


Saving 202501_clean2.xlsx to 202501_clean2 (4).xlsx
원본데이터 shape: (461, 11)

[매핑/단위 로그]


Unnamed: 0,Metal,Matched_Column,Unit_Status,N_nonNa,Mean(ug/m3)
0,Cr,Cr(ng/m3),converted_from_ng,352,0.002969
1,Cr(VI),,missing,0,
2,Co,Co(ng/m3),converted_from_ng,449,0.007846
3,Ni,Ni(ng/m3),converted_from_ng,326,0.002975
4,As,As(ng/m3),converted_from_ng,449,0.0
5,Cd,Cd(ng/m3),converted_from_ng,449,0.213893
6,Sb,Sb(ng/m3),converted_from_ng,232,0.01728
7,Pb,Pb(ng/m3),converted_from_ng,395,0.00894


전처리 완료 ▶ preprocessed/*.xlsx 저장
Saved: fit_outputs/fit_summary.xlsx


Unnamed: 0,Cr,Cr(VI),Co,Ni,Cd,Sb,Pb
분포:,정규,정규 (scaled 1/7),와이블,와이블,로그 정규,와이블,감마
최선 적합:,max=0.00558,max=0.000797,max=0.0149,max=0.00622,max=0.302,max=0.0419,max=0.027
앤더슨-달링:,1.478,1.478,2.317,1.271,3.188,2.124,0.9473
P 값:,0.006644518272425249,0.006644518272425249,0.0033222591362126247,0.009966777408637873,0.0033222591362126247,0.0033222591362126247,0.023255813953488372


K_total 요약: mean=0.0950611, P95=0.148026


Unnamed: 0_level_0,N_sim,Mean_ECR,Median_ECR,P95_ECR,P99_ECR,Pr(ECR>1e-6)
Metal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Cd,10000,3.638652e-05,3.359132e-05,6.060331e-05,8.419004e-05,1.0
Co,10000,6.592637e-06,6.055267e-06,1.29518e-05,1.771315e-05,0.9936
Cr(VI),10000,4.802846e-07,4.513899e-07,9.564439e-07,1.324772e-06,0.0405
Ni,10000,6.686959e-08,5.974972e-08,1.423071e-07,1.982408e-07,0.0
Pb,10000,9.454575e-09,7.095388e-09,2.564826e-08,4.090232e-08,0.0
Sb,10000,3.600582e-09,3.029494e-09,8.612402e-09,1.272716e-08,0.0


Saved: fit_outputs/fit_summary.xlsx, mc_from_fit/*.csv, mc_from_fit/ECR_summary_from_fit.xlsx
