In [None]:
import pandas as pd
import numpy as np
from typing import Optional, Literal
import re

# ==============================================================
# 1. 读取 CSV
# ==============================================================
CSV_MASS      = "mass_activity_summary_converted (1).csv"
CSV_Half_WAVE = "E1_2_summary (1).csv"

df_mass = pd.read_csv(CSV_MASS,encoding="utf-8-sig", dtype=str)
df_half = pd.read_csv(CSV_Half_WAVE, encoding="utf-8-sig", dtype=str)

for df in [df_mass, df_half]:
    df.columns = [c.strip() for c in df.columns]

# ==============================================================
# 2. 辅助函数
# ==============================================================
def contains_element(cell: str, target: str) -> bool:
    if pd.isna(cell) or cell in ("NAN", "NONE", ""):
        return False
    return target.upper() in str(cell).upper()

def parse_float(value: str) -> float:
    if pd.isna(value): return np.nan
    nums = re.findall(r"[-+]?\d*\.?\d+(?:[Ee][-+]?\d+)?", str(value).replace(",", ""))
    return float(nums[0]) if nums else np.nan

# ==============================================================
# 3. 预处理 mass 表
# ==============================================================
df_mass["mass_activity_A_per_mg"] = df_mass["mass_activity_A_per_mg"].apply(parse_float)
df_mass["Pt"] = pd.to_numeric(df_mass["Pt"], errors="coerce")
df_mass["PT/NOTPT"] = df_mass["PT/NOTPT"].astype(str).str.upper()
df_mass["Metal elements"] = df_mass["Metal elements"].astype(str).str.upper()

# ==============================================================
# 4. 算法 1：mass_activity → source_pdf（参数顺序已修复）
# ==============================================================
def find_closest_mass_pdf(
    is_pt: bool,
    mass_activity: float,
    metal_elements: Optional[Literal["Co", "Fe"]] = None
) -> Optional[str]:
    if is_pt:
        mask = (df_mass["Pt"] == 1)
    else:
        if metal_elements not in ("Co", "Fe"):
            raise ValueError("metal_elements 必须是 'Co' 或 'Fe'")
        mask = (df_mass["PT/NOTPT"] == "NOTPT")
        target = metal_elements.upper()
        mask &= df_mass["Metal elements"].apply(lambda x: contains_element(x, target))

    candidate = df_mass.loc[mask].copy()
    if candidate.empty or candidate["mass_activity_A_per_mg"].isna().all():
        return None

    candidate["diff"] = np.abs(candidate["mass_activity_A_per_mg"] - mass_activity)
    closest_row = candidate.loc[candidate["diff"].idxmin()]
    return closest_row["source_pdf"]

# ==============================================================
# 5. 预处理 half_wave 表
# ==============================================================
df_half["half_wave_potential_v"] = df_half["half_wave_potential_v"].apply(parse_float)
df_half["source_pdf"] = df_half["source_pdf"].astype(str)

# ==============================================================
# 6. 算法 3：pdf → half_wave
# ==============================================================
def get_half_wave_from_pdf(pdf_path: str) -> Optional[float]:
    matches = df_half[df_half["source_pdf"] == pdf_path]
    if matches.empty or matches["half_wave_potential_v"].isna().all():
        return None
    return matches["half_wave_potential_v"].iloc[0]

# ==============================================================
# 7. 主函数（参数顺序已修复）
# ==============================================================
def mass_to_half_wave(
    is_pt: bool,
    mass_activity: float,
    metal_elements: Optional[Literal["Co", "Fe"]] = None
) -> Optional[float]:
    pdf = find_closest_mass_pdf(is_pt, mass_activity, metal_elements)
    if not pdf:
        return None
    return get_half_wave_from_pdf(pdf)

# ==============================================================
# 8. 示例
# ==============================================================
if __name__ == "__main__":
    print("运行成功！")
    print(mass_to_half_wave(is_pt=True, mass_activity=0.85))
    print(mass_to_half_wave(is_pt=False, mass_activity=0.001, metal_elements="Co"))

运行成功！
0.95
0.931


In [6]:
# 转换文件为标准 UTF-8
def convert_to_utf8(input_path, output_path):
    with open(input_path, 'r', encoding='gbk') as f:
        content = f.read()
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(content)

convert_to_utf8("mass_activity_summary_converted (1).csv", "mass_activity_summary_converted (1).csv")