In [13]:
from rdkit import Chem
from rdkit.Chem import rdMolAlign
import os

def get_all_task_dirs(results_dir="results"):
    task_dirs = []
    for prefix in os.listdir(results_dir):
        prefix_path = os.path.join(results_dir, prefix)
        if not os.path.isdir(prefix_path):
            continue
        for task_id in os.listdir(prefix_path):
            task_path = os.path.join(prefix_path, task_id)
            if os.path.isdir(task_path):
                task_dirs.append(task_path)
    return task_dirs

def calc_rmsd_for_task(task_dir):
    init_path = os.path.join(task_dir, "homopoly_init.sdf")
    relaxed_path = os.path.join(task_dir, "homopoly_relaxed.sdf")
    if not (os.path.exists(init_path) and os.path.exists(relaxed_path)):
        return None
    mol_init = Chem.SDMolSupplier(init_path, removeHs=False)[0]
    mol_relaxed = Chem.SDMolSupplier(relaxed_path, removeHs=False)[0]
    if mol_init is None or mol_relaxed is None:
        return None
    # 保证分子有构象
    if mol_init.GetNumConformers() == 0 or mol_relaxed.GetNumConformers() == 0:
        return None
    # 计算RMSD
    try:
        rmsd = rdMolAlign.GetBestRMS(mol_init, mol_relaxed)
    except Exception as e:
        rmsd = None
    return rmsd

results = []
task_dirs = get_all_task_dirs("results")
for task_dir in task_dirs:
    rmsd = calc_rmsd_for_task(task_dir)
    if rmsd is not None:
        print(f"{task_dir}: RMSD = {rmsd:.4f}")
        results.append((task_dir, rmsd))
    else:
        print(f"{task_dir}: 计算失败或文件缺失")


results/test/task_78_repeat-5-monomers: RMSD = 0.7454
results/test/task_20_repeat-5-monomers: RMSD = 0.0081
results/test/task_99_repeat-5-monomers: RMSD = 1.1993
results/test/task_105_repeat-5-monomers: RMSD = 0.0992
results/test/task_75_repeat-5-monomers: RMSD = 0.2254
results/test/task_58_repeat-5-monomers: RMSD = 0.0000
results/test/task_13_repeat-5-monomers: RMSD = 0.1164
results/test/task_6_repeat-5-monomers: RMSD = 0.7257
results/test/task_52_repeat-5-monomers: RMSD = 1.0339
results/test/task_12_repeat-5-monomers: RMSD = 0.3298
results/test/task_98_repeat-5-monomers: RMSD = 0.0474
results/test/task_27_repeat-5-monomers: RMSD = 0.1601
results/test/task_53_repeat-5-monomers: RMSD = 0.0098
results/test/task_40_repeat-5-monomers: RMSD = 1.0578
results/test/task_38_repeat-5-monomers: RMSD = 0.4574
results/test/task_59_repeat-5-monomers: RMSD = 1.9150
results/test/task_29_repeat-5-monomers: RMSD = 0.6817
results/test/task_5_repeat-5-monomers: RMSD = 0.4146
results/test/task_25_repeat-5

In [14]:
import numpy as np

def calc_local_monomer_rmsd(task_dir):
    """
    计算task_dir下所有local_monomer_x.sdf之间的两两RMSD，返回一个RMSD矩阵和单体文件名列表
    """
    # 找到所有local_monomer_*.sdf文件
    monomer_files = []
    for fname in os.listdir(task_dir):
        if fname.startswith("local_monomer_") and fname.endswith(".sdf"):
            monomer_files.append(fname)
    monomer_files.sort()  # 按序号排序

    if len(monomer_files) < 2:
        print(f"{task_dir}: local_monomer数量不足2，无法计算RMSD")
        return None, None

    # 读取所有分子
    monomers = []
    for f in monomer_files:
        mol = Chem.SDMolSupplier(os.path.join(task_dir, f), removeHs=False)[0]
        if mol is None or mol.GetNumConformers() == 0:
            print(f"{task_dir}: {f} 读取失败或无构象")
            return None, None
        monomers.append(mol)

    n = len(monomers)
    rmsd_matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(i+1, n):
            try:
                rmsd = rdMolAlign.GetBestRMS(monomers[i], monomers[j])
            except Exception as e:
                print(e)
                rmsd = np.nan
            rmsd_matrix[i, j] = rmsd
            rmsd_matrix[j, i] = rmsd
    return rmsd_matrix, monomer_files

# 对每个任务文件夹计算local_monomer的RMSD矩阵
for task_dir in task_dirs:
    rmsd_matrix, monomer_files = calc_local_monomer_rmsd(task_dir)
    if rmsd_matrix is not None:
        print(f"\n{task_dir} 的 local_monomer 两两RMSD矩阵：")
        print("单体文件顺序：", monomer_files)
        print(np.array_str(rmsd_matrix, precision=4, suppress_small=True))
    else:
        print(f"{task_dir}: local_monomer RMSD计算失败")


No sub-structure match found between the reference and probe mol
No sub-structure match found between the reference and probe mol
No sub-structure match found between the reference and probe mol
No sub-structure match found between the reference and probe mol

results/test/task_78_repeat-5-monomers 的 local_monomer 两两RMSD矩阵：
单体文件顺序： ['local_monomer_0.sdf', 'local_monomer_1.sdf', 'local_monomer_2.sdf', 'local_monomer_3.sdf', 'local_monomer_4.sdf']
[[0.     0.6355 0.6651 0.5921    nan]
 [0.6355 0.     0.1211 0.1918    nan]
 [0.6651 0.1211 0.     0.2333    nan]
 [0.5921 0.1918 0.2333 0.        nan]
 [   nan    nan    nan    nan 0.    ]]
No sub-structure match found between the reference and probe mol
No sub-structure match found between the reference and probe mol
No sub-structure match found between the reference and probe mol
No sub-structure match found between the reference and probe mol

results/test/task_20_repeat-5-monomers 的 local_monomer 两两RMSD矩阵：
单体文件顺序： ['local_monomer_0.sdf', '


results/test/task_45_repeat-5-monomers 的 local_monomer 两两RMSD矩阵：
单体文件顺序： ['local_monomer_0.sdf', 'local_monomer_1.sdf', 'local_monomer_2.sdf', 'local_monomer_3.sdf', 'local_monomer_4.sdf']
[[0.     1.0035 0.9194 0.8918 0.0683]
 [1.0035 0.     0.2975 0.357  1.0014]
 [0.9194 0.2975 0.     0.0927 0.907 ]
 [0.8918 0.357  0.0927 0.     0.8775]
 [0.0683 1.0014 0.907  0.8775 0.    ]]
No sub-structure match found between the reference and probe mol
No sub-structure match found between the reference and probe mol
No sub-structure match found between the reference and probe mol
No sub-structure match found between the reference and probe mol

results/test/task_42_repeat-5-monomers 的 local_monomer 两两RMSD矩阵：
单体文件顺序： ['local_monomer_0.sdf', 'local_monomer_1.sdf', 'local_monomer_2.sdf', 'local_monomer_3.sdf', 'local_monomer_4.sdf']
[[0.        nan    nan    nan    nan]
 [   nan 0.     0.4368 0.1292 0.1838]
 [   nan 0.4368 0.     0.403  0.4827]
 [   nan 0.1292 0.403  0.     0.166 ]
 [   nan 0.1838 0