In [None]:
import numpy as np
import pandas as pd

# ================= 0) 光谱网格：380–750 nm，步长 5 nm =================
def make_wavelengths():
    return np.arange(380.0, 750.0 + 1e-9, 5.0)  # R/T 输出的是 380–750 nm，步长 5 nm 的采样点

# ================= 1) 折射率：TiO2 / SiO2 / MgF2（k=0） =================
# TiO2
lam_tab_tio2 = np.array([380.0,425.0,450.0,475.0,500.0,525.0,550.0,575.0,600.0,
                         625.0,650.0,675.0,750.0,775.0,800.0,825.0,850.0,900.0,
                         1000.0,1060.0])
n_tab_tio2   = np.array([2.55,2.49,2.469,2.444,2.422,2.402,2.385,2.37,2.351,
                         2.343,2.337,2.331,2.322,2.317,2.313,2.311,2.309,2.305,
                         2.300,2.299])
def n_tio2(lam_nm): return np.interp(lam_nm, lam_tab_tio2, n_tab_tio2)

# SiO2
lam_tab_sio2 = np.array([300.0,350.0,400.0,450.0,500.0,550.0,600.0,650.0,700.0,900.0,1000.0])
n_tab_sio2   = np.array([1.478 ,1.472 ,1.467 ,1.463 ,1.459 ,1.455 ,1.452 ,1.450 ,1.446 ,1.437 ,1.434])
def n_sio2(lam_nm): return np.interp(lam_nm, lam_tab_sio2, n_tab_sio2)

# MgF2
lam_tab_mgf2 = np.array([248.0, 550.0, 1550.0])
n_tab_mgf2   = np.array([1.40 , 1.38 , 1.36  ])
def n_mgf2(lam_nm): return np.interp(lam_nm, lam_tab_mgf2, n_tab_mgf2)

# 玻璃（常数）
glass_n_const = 1.5163
def n_glass(lam_nm): return np.full_like(lam_nm, glass_n_const, dtype=float)

# ================= 2) TMM（正入射，未偏振） =================
def tmm_normal_rt(layers, lam_nm, n_env, n_sub):
    """layers: [(n_arr, d_nm), ...] 从空气到基底（topdown）；返回 (R, T)。"""
    wl_c = lam_nm.astype(np.complex128)
    n0 = np.complex128(n_env)
    ns = np.complex128(n_sub)

    M11 = np.ones_like(wl_c, dtype=np.complex128)
    M12 = np.zeros_like(wl_c, dtype=np.complex128)
    M21 = np.zeros_like(wl_c, dtype=np.complex128)
    M22 = np.ones_like(wl_c, dtype=np.complex128)

    for (n_layer_arr, d_nm) in layers:
        nj = n_layer_arr.astype(np.complex128)
        delta = 2.0*np.pi * nj * (d_nm / wl_c)
        c, s = np.cos(delta), np.sin(delta)
        A = c; B = 1j * s / nj; C = 1j * nj * s; D = c
        T11 = M11*A + M12*C; T12 = M11*B + M12*D
        T21 = M21*A + M22*C; T22 = M21*B + M22*D
        M11, M12, M21, M22 = T11, T12, T21, T22

    den = (n0*M11 + n0*ns*M12 + M21 + ns*M22)
    r = (n0*M11 + n0*ns*M12 - M21 - ns*M22) / den
    t = (2.0*n0) / den

    R = np.abs(r)**2
    T = (np.real(ns)/np.real(n0)) * np.abs(t)**2
    return R.real, T.real

# ================= 3) 随机结构（厚度离散为 10 nm） =================
MATERIALS = ["SiO2", "TiO2", "MgF2"]

def mat_to_narr(name, lam_nm):
    if   name == "SiO2": return n_sio2(lam_nm)
    elif name == "TiO2": return n_tio2(lam_nm)
    elif name == "MgF2": return n_mgf2(lam_nm)
    else: raise ValueError(name)

def build_layers(materials, thicknesses_nm, lam_nm, input_order="topdown"):
    """
    input_order:
      - "topdown": materials[0] 是贴空气的顶层（默认）
      - "bottomup": materials[0] 是贴玻璃的最底层（内部将翻转为自顶向下）
    返回：layers(topdown), mats_topdown, thks_topdown
    """
    if input_order not in ("topdown", "bottomup"):
        raise ValueError("input_order must be 'topdown' or 'bottomup'")
    if input_order == "bottomup":
        materials = list(reversed(materials))
        thicknesses_nm = list(reversed(thicknesses_nm))
    layers = [(mat_to_narr(m, lam_nm), float(d)) for m, d in zip(materials, thicknesses_nm)]
    return layers, materials, thicknesses_nm  # 全部为 topdown 顺序

def rand_stack(rng, min_layers=1, max_layers=20,
               tmin=20.0, tmax=300.0, thickness_step=10.0):
    """层数随机，厚度 ∈ [20,300] 且为 thickness_step 的整数倍。"""
    L = rng.integers(min_layers, max_layers + 1)
    mats = rng.choice(MATERIALS, size=L, replace=True).tolist()
    lo = int(np.ceil(tmin / thickness_step))
    hi = int(np.floor(tmax / thickness_step))
    idx = rng.integers(lo, hi + 1, size=L)
    thks = (idx * thickness_step).astype(float).tolist()
    return mats, thks  # 这里返回的是“生成顺序”，通常我们当作 topdown 使用

def serialize_tokens(materials_topdown, thicknesses_topdown, fmt=".0f"):
    """如：SiO2:120|TiO2:230|MgF2:40|<EoS>  —— 使用物理求解所用的 topdown 顺序"""
    spec = fmt.lstrip(":")
    parts = [f"{m}:{format(float(t), spec)}" for m, t in zip(materials_topdown, thicknesses_topdown)]
    return "|".join(parts) + "|<EoS>"

# ================= 4) 光谱：玻璃背板 + 非相干合成（不做任何强制 0） =================
def simulate_stack_glass_noncoh(front_layers_topdown, lam_nm, n_air=1.0, n_exit=1.0):
    """
    前向/反向 TMM + 背面玻璃菲涅耳 + 非相干合成；
    三种材料均为无吸收介质（k=0），因此不会出现整段 T=0。
    参数 front_layers_topdown 必须是自顶向下(topdown)顺序。
    """
    n_sub_arr = n_glass(lam_nm)

    # 正向：Air -> stack -> Glass
    R01, T01 = tmm_normal_rt(front_layers_topdown, lam_nm, n_air, n_sub_arr)
    # 反向：Glass -> reversed(stack) -> Air
    R10, T10 = tmm_normal_rt(list(reversed(front_layers_topdown)), lam_nm, n_sub_arr, n_air)

    # 玻璃-空气背面菲涅耳（常数，正入射）
    R_b = ((glass_n_const - n_exit) / (glass_n_const + n_exit))**2
    R_b = np.full_like(lam_nm, R_b, dtype=float)

    # 非相干合成
    R = R01 + (T01 * T10 * R_b) / (1.0 - R10 * R_b)
    T = (T01 * (1.0 - R_b)) / (1.0 - R10 * R_b)
    return R, T

# ================= 5) 数据集生成（按样例宽表 + 首列 Unnamed: 0） =================
def _structure_str(materials_topdown, thicknesses_topdown):
    """
    生成形如 ['SiO2_120', 'TiO2_230', ...] 的字符串（带方括号）。
    只是显示为整数厚度，不改变计算过程中的真实数值。
    """
    tokens = [f"{m}_{int(round(float(t)))}" for m, t in zip(materials_topdown, thicknesses_topdown)]
    return "[" + ", ".join([f"'{tok}'" for tok in tokens]) + "]"

def generate_dataset(num_samples=1000,
                     thickness_step=10.0,
                     input_order="topdown",   # 与你上面的约定一致
                     seed=42,
                     output_csv="opto_RT_380_750_like_lastfile.csv"):
    """
    只改“导出格式”，其余（结构生成/TMM/非相干合成）一概不动。
    导出的列严格匹配你给的 5 行样例：structure, num_layers, R_380nm...R_750nm, T_380nm...T_750nm
    并在 CSV 首列写出索引，列名为 'Unnamed: 0'。
    """
    wl = make_wavelengths()  # 380–750, 步长 5.0 nm（保持不变）
    rng = np.random.default_rng(seed)

    # 预先准备好列名顺序（先 R 全部，再 T 全部；波长升序）
    R_cols = [f"R_{int(round(l))}nm" for l in wl]
    T_cols = [f"T_{int(round(l))}nm" for l in wl]
    final_cols = ["structure", "num_layers"] + R_cols + T_cols

    rows = []

    for _ in range(num_samples):
        # —— 保持你原有的数据生成不变 —— #
        mats_in, thks_in = rand_stack(
            rng,
            min_layers=1, max_layers=20,
            tmin=20.0, tmax=300.0,
            thickness_step=thickness_step
        )

        # 统一转为 topdown 顺序再计算（与原逻辑一致）
        layers, mats_topdown, thks_topdown = build_layers(
            mats_in, thks_in, wl, input_order=input_order
        )

        # 玻璃背板 + 非相干合成（保持原逻辑与数值）
        R, T = simulate_stack_glass_noncoh(layers, wl)

        # —— 仅改“行字典”的拼装（按样例格式）—— #
        row = {}
        row["structure"]  = _structure_str(mats_topdown, thks_topdown)
        row["num_layers"] = len(mats_topdown)

        # 写入 R_*（升序）
        for lam, r in zip(wl, R):
            lam_i = int(round(lam))
            row[f"R_{lam_i}nm"] = float(r)

        # 写入 T_*（升序）
        for lam, t in zip(wl, T):
            lam_i = int(round(lam))
            row[f"T_{lam_i}nm"] = float(t)

        rows.append(row)

    # DataFrame 严格按目标列顺序
    df = pd.DataFrame(rows, columns=final_cols)

    # 关键：导出索引为首列，列名指定为 'Unnamed: 0'
    # （这样写出的 CSV 第一列表头就是 Unnamed: 0）
    df.to_csv(output_csv, index=True, index_label="Unnamed: 0", encoding="utf-8")
    return df

In [None]:
df = generate_dataset(
    num_samples=20000,
    thickness_step=10.0,
    input_order="bottomup",              # 或 "bottomup"（内部会翻转求解）
    seed=42,
    output_csv="opto_RT_380_750_like_test.csv"
)