In [None]:
from google.colab import files
uploaded = files.upload()  # 一次性全选你的 14 个 CSV
print("已上传：", list(uploaded.keys()))


Saving 异构GA_2.csv to 异构GA_2.csv
Saving 异构DE_2.csv to 异构DE_2.csv
Saving 异构DE_1.csv to 异构DE_1.csv
Saving 异构GA_1.csv to 异构GA_1.csv
Saving 4岛DE_4.csv to 4岛DE_4.csv
Saving 4岛DE_3.csv to 4岛DE_3.csv
Saving 4岛DE_2.csv to 4岛DE_2.csv
Saving 4岛DE_1.csv to 4岛DE_1.csv
Saving 4岛GA_4.csv to 4岛GA_4.csv
Saving 4岛GA_3.csv to 4岛GA_3.csv
Saving 4岛GA_2.csv to 4岛GA_2.csv
Saving 4岛GA_1.csv to 4岛GA_1.csv
Saving 非岛屿模型单DE.csv to 非岛屿模型单DE.csv
Saving 非岛屿模型单GA.csv to 非岛屿模型单GA.csv
已上传： ['异构GA_2.csv', '异构DE_2.csv', '异构DE_1.csv', '异构GA_1.csv', '4岛DE_4.csv', '4岛DE_3.csv', '4岛DE_2.csv', '4岛DE_1.csv', '4岛GA_4.csv', '4岛GA_3.csv', '4岛GA_2.csv', '4岛GA_1.csv', '非岛屿模型单DE.csv', '非岛屿模型单GA.csv']


In [None]:
# =============================================
# Recompute Appendix IV & V stats (Colab-ready)
# =============================================
import os, math
from pathlib import Path
import numpy as np
import pandas as pd

FILE_DE4 = ["4岛DE_1.csv","4岛DE_2.csv","4岛DE_3.csv","4岛DE_4.csv"]
FILE_GA4 = ["4岛GA_1.csv","4岛GA_2.csv","4岛GA_3.csv","4岛GA_4.csv"]
FILE_HET4 = ["异构DE_1.csv","异构DE_2.csv","异构GA_1.csv","异构GA_2.csv"]
FILE_DE1 = "非岛屿模型单DE.csv"
FILE_GA1 = "非岛屿模型单GA.csv"

LAMBDA_EFF_SINGLE = 10
LAMBDA_EFF_MULTI  = 40
THETAS = [1e-3, 1e-5, 1e-7]
SR_BUDGET_FE = 50_000
CENSOR_FE    = 200_000

OUTDIR = Path("outputs"); OUTDIR.mkdir(exist_ok=True, parents=True)

def _assert_exists(paths):
    miss = [str(p) for p in paths if not Path(p).exists()]
    if miss: raise FileNotFoundError(f"Missing files: {miss}")

def _read(p): return pd.read_csv(p, encoding="utf-8-sig")

def load_island_group(paths, label):
    _assert_exists(paths)
    dfs=[]
    for p in paths:
        df=_read(p)[["GA_run","Generation","Best"]].copy()
        df=df.rename(columns={"Best": Path(p).stem})
        dfs.append(df)
    out=dfs[0]
    for d in dfs[1:]:
        out=out.merge(d,on=["GA_run","Generation"],how="inner")
    island_cols=[c for c in out.columns if c not in ["GA_run","Generation"]]
    out=out.sort_values(["GA_run","Generation"])
    out["BestGlobal_inst"]=out[island_cols].min(axis=1)
    out["BestGlobal_bsf"]=out.groupby("GA_run")["BestGlobal_inst"].cummin()
    out["Config"]=label
    return out[["GA_run","Generation","BestGlobal_inst","BestGlobal_bsf","Config"]]

def load_single_island(path,label):
    _assert_exists([path])
    df=_read(path)[["GA_run","Generation","Best"]].copy()
    df=df.rename(columns={"Best":"BestGlobal_inst"})
    df=df.sort_values(["GA_run","Generation"])
    df["BestGlobal_bsf"]=df.groupby("GA_run")["BestGlobal_inst"].cummin()
    df["Config"]=label
    return df[["GA_run","Generation","BestGlobal_inst","BestGlobal_bsf","Config"]]

DE4=load_island_group(FILE_DE4,"DE4")
GA4=load_island_group(FILE_GA4,"GA4")
HET4=load_island_group(FILE_HET4,"HET4")
DE1=load_single_island(FILE_DE1,"DE1")
GA1=load_single_island(FILE_GA1,"GA1")

all_df=pd.concat([DE4,GA4,HET4,DE1,GA1],ignore_index=True)
SCALE={"DE4":LAMBDA_EFF_MULTI,"GA4":LAMBDA_EFF_MULTI,"HET4":LAMBDA_EFF_MULTI,
       "DE1":LAMBDA_EFF_SINGLE,"GA1":LAMBDA_EFF_SINGLE}
all_df["FE"]=all_df.apply(lambda r:r["Generation"]*SCALE[r["Config"]],axis=1)

# -- SR@50k two-proportion z-tests --
def sr_counts(df_cfg, theta, budget=SR_BUDGET_FE):
    sub=df_cfg[df_cfg["FE"]<=budget].sort_values(["GA_run","FE"])
    last=sub.groupby("GA_run").tail(1)
    succ=int((last["BestGlobal_bsf"]<=theta).sum())
    total=last["GA_run"].nunique()
    return succ,total

def two_prop_z(x1,n1,x2,n2):
    if n1==0 or n2==0: return np.nan,np.nan,np.nan,np.nan
    p1=x1/n1; p2=x2/n2; p=(x1+x2)/(n1+n2)
    se=math.sqrt(p*(1-p)*(1/n1+1/n2))
    z=0.0 if se==0 else (p1-p2)/se
    Phi=lambda x:0.5*(1+math.erf(x/math.sqrt(2)))
    p_two=2*(1-Phi(abs(z)))
    return p1,p2,z,p_two

PAIRS_PROP=[("GA1","DE1"),("DE4","GA4"),("HET4","GA4"),("DE4","HET4")]
rows=[]
for th in THETAS:
    for a,b in PAIRS_PROP:
        da=all_df[all_df["Config"]==a]
        db=all_df[all_df["Config"]==b]
        x1,n1=sr_counts(da,th); x2,n2=sr_counts(db,th)
        p1,p2,z,pv=two_prop_z(x1,n1,x2,n2)
        rows.append({"Theta":th,"Pair":f"{a} vs {b}",
                     "x1":x1,"n1":n1,"x2":x2,"n2":n2,"z":z,"p_two":pv,
                     "p1":p1,"p2":p2})
twoprop_df=pd.DataFrame(rows)
twoprop_df.to_csv(OUTDIR/"verification_twoprop.csv",index=False)
print("\n=== Appendix IV: Two-proportion z-tests (SR@50k) ===")
disp=twoprop_df.copy()
disp["Theta"]=disp["Theta"].map({1e-3:"1e-3",1e-5:"1e-5",1e-7:"1e-7"})
disp["p1(%)"]=(disp["p1"]*100).round(1); disp["p2(%)"]=(disp["p2"]*100).round(1)
print(disp[["Theta","Pair","x1","n1","x2","n2","z","p_two","p1(%)","p2(%)"]]
      .to_string(index=False,justify="center",float_format=lambda v:f"{v:.3g}"))

# -- FEhit log-rank (four-island pairs) --
def fehit_vector(df_cfg, theta, censor_fe=CENSOR_FE):
    sub=df_cfg[df_cfg["FE"]<=censor_fe].sort_values(["GA_run","FE"])
    hits=[]
    for run,g in sub.groupby("GA_run"):
        h=g[g["BestGlobal_bsf"]<=theta]
        hits.append(float(h["FE"].iloc[0]) if len(h)>0 else np.nan)
    return np.array(hits,float)

def logrank_two_sample(times_a,times_b):
    a=np.asarray(times_a,float); a=a[~np.isnan(a)]; a.sort()
    b=np.asarray(times_b,float); b=b[~np.isnan(b)]; b.sort()
    if len(a)==0 or len(b)==0: return 0.0,1.0
    uniq=np.unique(np.concatenate([a,b]))
    from collections import Counter
    ca,cb=Counter(a),Counter(b)
    n_a,n_b=len(a),len(b); at_a,at_b=n_a,n_b
    Oa=Ea=Va=0.0
    for t in uniq:
        da,db=ca.get(t,0),cb.get(t,0); d=da+db
        if (at_a+at_b)>1 and d>0:
            e_a=d*(at_a/(at_a+at_b))
            v_a=(at_a*at_b*d*(at_a+at_b-d))/(((at_a+at_b)**2)*(at_a+at_b-1))
            Oa+=da; Ea+=e_a; Va+=v_a
        at_a-=da; at_b-=db
    if Va<=0: return 0.0,1.0
    chi2=(Oa-Ea)**2/Va
    z=math.sqrt(chi2); Phi=lambda x:0.5*(1+math.erf(x/math.sqrt(2)))
    p_two=2*(1-Phi(z))
    return chi2,p_two

PAIRS_LR=[("DE4","HET4"),("DE4","GA4"),("HET4","GA4")]
rows=[]
for th in THETAS:
    for a,b in PAIRS_LR:
        va=fehit_vector(all_df[all_df["Config"]==a],th)
        vb=fehit_vector(all_df[all_df["Config"]==b],th)
        chi2,p=logrank_two_sample(va,vb)
        rows.append({"Theta":th,"Pair":f"{a} vs {b}","Chi2":chi2,"p_logrank":p,
                     "n_a":int(np.isfinite(va).sum()),"n_b":int(np.isfinite(vb).sum())})
logrank_df=pd.DataFrame(rows)
logrank_df.to_csv(OUTDIR/"verification_logrank.csv",index=False)

print("\n=== Appendix V: Log-rank tests on FEhit (four-island pairs) ===")
disp=logrank_df.copy()
disp["Theta"]=disp["Theta"].map({1e-3:"1e-3",1e-5:"1e-5",1e-7:"1e-7"})
disp["Chi2"]=disp["Chi2"].map(lambda x: float(f"{x:.3f}"))
disp["p_logrank"]=disp["p_logrank"].map(lambda x: float(f"{x:.4g}"))
print(disp[["Theta","Pair","n_a","n_b","Chi2","p_logrank"]]
      .to_string(index=False,justify="center"))

print("\nSaved CSVs -> ./outputs/:")
for p in sorted(OUTDIR.glob("*.csv")):
    print(" -", p.name)



=== Appendix IV: Two-proportion z-tests (SR@50k) ===
Theta     Pair     x1  n1  x2  n2   z    p_two   p1(%)  p2(%)
 1e-3  GA1 vs DE1  30  30   0  30 7.75 9.33e-15   100      0 
 1e-3  DE4 vs GA4  30  30  30  30    0        1   100    100 
 1e-3 HET4 vs GA4  30  30  30  30    0        1   100    100 
 1e-3 DE4 vs HET4  30  30  30  30    0        1   100    100 
 1e-5  GA1 vs DE1  28  30   0  30 7.25  4.3e-13  93.3      0 
 1e-5  DE4 vs GA4  30  30   5  30 6.55 5.89e-11   100   16.7 
 1e-5 HET4 vs GA4  30  30   5  30 6.55 5.89e-11   100   16.7 
 1e-5 DE4 vs HET4  30  30  30  30    0        1   100    100 
 1e-7  GA1 vs DE1  17  30   0  30 4.87 1.11e-06  56.7      0 
 1e-7  DE4 vs GA4  30  30   0  30 7.75 9.33e-15   100      0 
 1e-7 HET4 vs GA4  30  30   0  30 7.75 9.33e-15   100      0 
 1e-7 DE4 vs HET4  30  30  30  30    0        1   100    100 

=== Appendix V: Log-rank tests on FEhit (four-island pairs) ===
Theta     Pair     n_a  n_b  Chi2    p_logrank 
 1e-3 DE4 vs HET4  30   30 