In [None]:
import os
import re
import pandas as pd
import json
import string
from multiprocessing import Pool
import yaml
from pathlib import Path
from functools import partial
import numpy as np

In [None]:
def process_af3_file(json_file, input_data, af3_output_dir, af3_analysis_dir):
    pattern = r"^(.*?)_seed-(\d+)_sample-(\d+)\.json$"
    match = re.match(pattern, json_file)

    target_id = match.group(1)
    seed = int(match.group(2))
    sample = int(match.group(3))

    target_id_lower = target_id.lower()
    trg_out_dir = os.path.join(af3_output_dir, target_id_lower)
    df_ranking = pd.read_csv(os.path.join(trg_out_dir, "ranking_scores.csv"))
    if seed not in df_ranking["seed"].values:
        return [], []
    ranking_score_df = df_ranking[
        (df_ranking["seed"] == seed) & (df_ranking["sample"] == sample)
    ]

    conf_json = os.path.join(
        trg_out_dir, f"seed-{seed}_sample-{sample}", "summary_confidences.json"
    )

    json_path = os.path.join(af3_analysis_dir, json_file)

    with open(json_path) as f:
        result = json.load(f)

    with open(conf_json) as f:
        confidences = json.load(f)

    try:
        lddt_pli_list = result["lddt_pli"]["assigned_scores"]
        rmsd_list = result["rmsd"]["assigned_scores"]
    except KeyError as e:
        return [], []

    if not lddt_pli_list or not rmsd_list:
        return [], []

    af3_data_lddt_pli = []
    af3_data_rmsd = []

    for item in lddt_pli_list:
        trg_lig_chain = item["reference_ligand"].split("/")[-1].split(".sdf")[0]
        mdl_lig_chain = item["model_ligand"].split(".")[0]
        global_index = string.ascii_uppercase.index(mdl_lig_chain)
        num_prot_chains = len(input_data[target_id]["sequences"])
        ligand_index = global_index - num_prot_chains
        lddt_pli = item["score"]
        prot_lig_chain_iptm = [
            confidences["chain_pair_iptm"][idx][global_index]
            for idx in range(num_prot_chains)
        ]
        prot_lig_chain_iptm_average = sum(prot_lig_chain_iptm) / len(
            prot_lig_chain_iptm
        )
        lig_prot_chain_iptm = [
            confidences["chain_pair_iptm"][global_index][idx]
            for idx in range(num_prot_chains)
        ]
        lig_prot_chain_iptm_average = sum(lig_prot_chain_iptm) / len(
            lig_prot_chain_iptm
        )
        af3_data_lddt_pli.append(
            {
                "target": target_id,
                "method": "af3",
                "seed": seed,
                "sample": sample,
                "ranking_score": ranking_score_df["ranking_score"].item(),
                "prot_lig_chain_iptm_average": prot_lig_chain_iptm_average,
                "prot_lig_chain_iptm_min": min(prot_lig_chain_iptm),
                "prot_lig_chain_iptm_max": max(prot_lig_chain_iptm),
                "lig_prot_chain_iptm_average": lig_prot_chain_iptm_average,
                "lig_prot_chain_iptm_min": min(lig_prot_chain_iptm),
                "lig_prot_chain_iptm_max": max(lig_prot_chain_iptm),
                "lddt_pli": lddt_pli,
                "model_ligand_chain": mdl_lig_chain,
                "model_ligand_ccd_code": input_data[target_id]["ccd_codes"][
                    ligand_index
                ],
                "model_ligand_smiles": input_data[target_id]["smiles"][ligand_index],
                "target_ligand_chain": trg_lig_chain,
            }
        )

    for item in rmsd_list:
        trg_lig_chain = item["reference_ligand"].split("/")[-1].split(".sdf")[0]
        mdl_lig_chain = item["model_ligand"].split(".")[0]
        global_index = string.ascii_uppercase.index(mdl_lig_chain)
        num_prot_chains = len(input_data[target_id]["sequences"])
        ligand_index = global_index - num_prot_chains
        rmsd = item["score"]
        lddt_lp = item["lddt_lp"]
        bb_rmsd = item["bb_rmsd"]
        prot_lig_chain_iptm = [
            confidences["chain_pair_iptm"][idx][global_index]
            for idx in range(num_prot_chains)
        ]
        prot_lig_chain_iptm_average = sum(prot_lig_chain_iptm) / len(
            prot_lig_chain_iptm
        )
        lig_prot_chain_iptm = [
            confidences["chain_pair_iptm"][global_index][idx]
            for idx in range(num_prot_chains)
        ]
        lig_prot_chain_iptm_average = sum(lig_prot_chain_iptm) / len(
            lig_prot_chain_iptm
        )
        af3_data_rmsd.append(
            {
                "target": target_id,
                "method": "af3",
                "seed": seed,
                "sample": sample,
                "ranking_score": ranking_score_df["ranking_score"].item(),
                "prot_lig_chain_iptm_average": prot_lig_chain_iptm_average,
                "prot_lig_chain_iptm_min": min(prot_lig_chain_iptm),
                "prot_lig_chain_iptm_max": max(prot_lig_chain_iptm),
                "lig_prot_chain_iptm_average": lig_prot_chain_iptm_average,
                "lig_prot_chain_iptm_min": min(lig_prot_chain_iptm),
                "lig_prot_chain_iptm_max": max(lig_prot_chain_iptm),
                "rmsd": rmsd,
                "lddt_lp": lddt_lp,
                "bb_rmsd": bb_rmsd,
                "model_ligand_chain": mdl_lig_chain,
                "model_ligand_ccd_code": input_data[target_id]["ccd_codes"][
                    ligand_index
                ],
                "model_ligand_smiles": input_data[target_id]["smiles"][ligand_index],
                "target_ligand_chain": trg_lig_chain,
            }
        )

    return af3_data_lddt_pli, af3_data_rmsd

In [None]:
def process_boltz_file(json_file, input_data, boltz_output_dir, boltz_analysis_dir):
    pattern = r"^(.*?)_(\d+)_(\d+)\.json$"
    match = re.match(pattern, json_file)
    if not match:
        return [], []

    target_id = match.group(1)
    seed = str(match.group(2))
    sample = str(match.group(3))

    if target_id not in input_data.keys():
        return [], []

    seed_dir = os.path.join(boltz_output_dir, target_id, seed, "boltz_results_input" , "predictions", "input")

    conf_json = os.path.join(seed_dir, f"confidence_input_model_{sample}.json")
    
    json_path = os.path.join(boltz_analysis_dir, json_file)

    with open(json_path) as f:
        result = json.load(f)

    with open(conf_json) as f:
        data = json.load(f)

    try:
        lddt_pli_list = result["lddt_pli"]["assigned_scores"]
        rmsd_list = result["rmsd"]["assigned_scores"]
    except KeyError as e:
        return [], []
    
    if not lddt_pli_list or not rmsd_list:
        return [], []

    boltz_data_lddt_pli = []
    boltz_data_rmsd = []
    
    for item in lddt_pli_list:
        mdl_lig_chain = item["model_ligand"].split(".")[0]
        global_index = string.ascii_uppercase.index(mdl_lig_chain)
        num_prot_chains = len(input_data[target_id]["sequences"])
        ligand_index = global_index - num_prot_chains
        trg_lig_chain = item["reference_ligand"].split("/")[-1].split(".sdf")[0]
        lddt_pli = item["score"]
        lig_prot_pair_iptm = [data["pair_chains_iptm"][str(global_index)][str(idx)] for idx in range(num_prot_chains)]
        lig_prot_pair_iptm_average = sum(lig_prot_pair_iptm) / len(lig_prot_pair_iptm)
        prot_lig_pair_iptm = [data["pair_chains_iptm"][str(idx)][str(global_index)] for idx in range(num_prot_chains)]
        prot_lig_pair_iptm_average = sum(prot_lig_pair_iptm) / len(prot_lig_pair_iptm)
        boltz_data_lddt_pli.append({
            "target": target_id,
            "method": "boltz",
            "seed": seed,
            "sample": sample,
            "ranking_score": data["confidence_score"],
            "prot_lig_chain_iptm_average": prot_lig_pair_iptm_average,
            "prot_lig_chain_iptm_min": min(prot_lig_pair_iptm),
            "prot_lig_chain_iptm_max": max(prot_lig_pair_iptm),
            "lig_prot_chain_iptm_average": lig_prot_pair_iptm_average,
            "lig_prot_chain_iptm_min": min(lig_prot_pair_iptm),
            "lig_prot_chain_iptm_max": max(lig_prot_pair_iptm),
            "lddt_pli": lddt_pli,
            "model_ligand_chain": mdl_lig_chain,
            "model_ligand_ccd_code": input_data[target_id]["ccd_codes"][ligand_index],
            "model_ligand_smiles": input_data[target_id]["smiles"][ligand_index],
            "target_ligand_chain": trg_lig_chain
        })     
        
    for item in rmsd_list:
        mdl_lig_chain = item["model_ligand"].split(".")[0]
        global_index = string.ascii_uppercase.index(mdl_lig_chain)
        num_prot_chains = len(input_data[target_id]["sequences"])
        ligand_index = global_index - num_prot_chains
        trg_lig_chain = item["reference_ligand"].split("/")[-1].split(".sdf")[0]
        rmsd = item["score"]
        lddt_lp = item["lddt_lp"]
        bb_rmsd = item["bb_rmsd"]
        lig_prot_pair_iptm = [data["pair_chains_iptm"][str(global_index)][str(idx)] for idx in range(num_prot_chains)]
        lig_prot_pair_iptm_average = sum(lig_prot_pair_iptm) / len(lig_prot_pair_iptm)
        prot_lig_pair_iptm = [data["pair_chains_iptm"][str(idx)][str(global_index)] for idx in range(num_prot_chains)]
        prot_lig_pair_iptm_average = sum(prot_lig_pair_iptm) / len(prot_lig_pair_iptm)
        boltz_data_rmsd.append({
            "target": target_id,
            "method": "boltz",
            "seed": seed,
            "sample": sample,
            "ranking_score": data["confidence_score"],
            "prot_lig_chain_iptm_average": prot_lig_pair_iptm_average,
            "prot_lig_chain_iptm_min": min(prot_lig_pair_iptm),
            "prot_lig_chain_iptm_max": max(prot_lig_pair_iptm),
            "lig_prot_chain_iptm_average": lig_prot_pair_iptm_average,
            "lig_prot_chain_iptm_min": min(lig_prot_pair_iptm),
            "lig_prot_chain_iptm_max": max(lig_prot_pair_iptm),
            "rmsd": rmsd,
            "lddt_lp": lddt_lp,
            "bb_rmsd": bb_rmsd,
            "model_ligand_chain": mdl_lig_chain,
            "model_ligand_ccd_code": input_data[target_id]["ccd_codes"][ligand_index],
            "model_ligand_smiles": input_data[target_id]["smiles"][ligand_index],
            "target_ligand_chain": trg_lig_chain
        })

    return boltz_data_lddt_pli, boltz_data_rmsd

In [None]:
def process_protenix_file(json_file, input_data, protenix_output_dir, protenix_analysis_dir):
    pattern = r"^(.*?)_seed_(\d+)_(\d+)\.json$"
    match = re.match(pattern, json_file)
    if not match:
        return [], []
    
    target_id = match.group(1)
    seed = str(match.group(2))
    sample = str(match.group(3))

    seed_dir = os.path.join(protenix_output_dir, target_id, f"seed_{seed}", "predictions")

    conf_json = os.path.join(seed_dir, f"{target_id}_seed_{seed}_summary_confidence_sample_{sample}.json")
    
    json_path = os.path.join(protenix_analysis_dir, json_file)

    with open(json_path) as f:
        result = json.load(f)

    with open(conf_json) as f:
        confidences = json.load(f)
    
    try:
        lddt_pli_list = result["lddt_pli"]["assigned_scores"]
        rmsd_list = result["rmsd"]["assigned_scores"]
    except KeyError as e:
        return [], []
    
    if not lddt_pli_list or not rmsd_list:
        return [], []

    protenix_data_lddt_pli = []
    protenix_data_rmsd = []
    for item in lddt_pli_list:
        mdl_lig_chain = item["model_ligand"].split("/")[-1].split(".")[0][0]
        global_index = string.ascii_uppercase.index(mdl_lig_chain)
        trg_lig_chain = item["reference_ligand"].split("/")[-1].split(".sdf")[0]
        num_prot_chains = len(input_data[target_id]["sequences"])
        ligand_index = global_index - num_prot_chains
        lddt_pli = item["score"]
        prot_lig_chain_iptm = [confidences["chain_pair_iptm"][idx][global_index] for idx in range(num_prot_chains)]
        prot_lig_chain_iptm_average = sum(prot_lig_chain_iptm) / len(prot_lig_chain_iptm)
        lig_prot_chain_iptm = [confidences["chain_pair_iptm"][global_index][idx] for idx in range(num_prot_chains)]
        lig_prot_chain_iptm_average = sum(lig_prot_chain_iptm) / len(lig_prot_chain_iptm)
        protenix_data_lddt_pli.append({
            "target": target_id,
            "method": "protenix",
            "seed": seed,
            "sample": sample,
            "ranking_score": confidences["ranking_score"],
            "prot_lig_chain_iptm_average": prot_lig_chain_iptm_average,
            "prot_lig_chain_iptm_min": min(prot_lig_chain_iptm),
            "prot_lig_chain_iptm_max": max(prot_lig_chain_iptm),
            "lig_prot_chain_iptm_average": lig_prot_chain_iptm_average,
            "lig_prot_chain_iptm_min": min(lig_prot_chain_iptm),
            "lig_prot_chain_iptm_max": max(lig_prot_chain_iptm),
            "lddt_pli": lddt_pli,
            "model_ligand_chain": mdl_lig_chain,
            "model_ligand_ccd_code": input_data[target_id]["ccd_codes"][
                ligand_index
            ],
            "model_ligand_smiles": input_data[target_id]["smiles"][ligand_index],
            "target_ligand_chain": trg_lig_chain
        })     
        
    for item in rmsd_list:
        mdl_lig_chain = item["model_ligand"].split("/")[-1].split(".")[0][0]
        global_index = string.ascii_uppercase.index(mdl_lig_chain)
        trg_lig_chain = item["reference_ligand"].split("/")[-1].split(".sdf")[0]
        num_prot_chains = len(input_data[target_id]["sequences"])
        ligand_index = global_index - num_prot_chains
        rmsd = item["score"]
        lddt_lp = item["lddt_lp"]
        bb_rmsd = item["bb_rmsd"]
        prot_lig_chain_iptm = [confidences["chain_pair_iptm"][idx][global_index] for idx in range(num_prot_chains)]
        prot_lig_chain_iptm_average = sum(prot_lig_chain_iptm) / len(prot_lig_chain_iptm)
        lig_prot_chain_iptm = [confidences["chain_pair_iptm"][global_index][idx] for idx in range(num_prot_chains)]
        lig_prot_chain_iptm_average = sum(lig_prot_chain_iptm) / len(lig_prot_chain_iptm)
        protenix_data_rmsd.append({
            "target": target_id,
            "method": "protenix",
            "seed": seed,
            "sample": sample,
            "ranking_score": confidences["ranking_score"],
            "prot_lig_chain_iptm_average": prot_lig_chain_iptm_average,
            "prot_lig_chain_iptm_min": min(prot_lig_chain_iptm),
            "prot_lig_chain_iptm_max": max(prot_lig_chain_iptm),
            "lig_prot_chain_iptm_average": lig_prot_chain_iptm_average,
            "lig_prot_chain_iptm_min": min(lig_prot_chain_iptm),
            "lig_prot_chain_iptm_max": max(lig_prot_chain_iptm),
            "rmsd": rmsd,
            "lddt_lp": lddt_lp,
            "bb_rmsd": bb_rmsd,
            "model_ligand_chain": mdl_lig_chain,
            "model_ligand_ccd_code": input_data[target_id]["ccd_codes"][
                ligand_index
            ],
            "model_ligand_smiles": input_data[target_id]["smiles"][ligand_index],
            "target_ligand_chain": trg_lig_chain
        })
    return protenix_data_lddt_pli, protenix_data_rmsd

In [None]:
def process_chai_file(json_file, input_data, chai_output_dir, chai_analysis_dir):
    pattern = r"^(.*?)_seed_(\d+)_(\d+)\.json$"
    match = re.match(pattern, json_file)

    if not match:
        return [], []

    target_id = match.group(1)
    seed = str(match.group(2))
    sample = str(match.group(3))

    chai_out_dir = Path(chai_output_dir)
    chai_trg_dir = chai_out_dir / target_id.lower()

    pattern = f"seed_{seed}/scores.model_idx_{sample}.npz"
    matches = list(chai_trg_dir.rglob(pattern))

    if matches:
        conf_json = matches[0]
        data = np.load(conf_json)
    else:
        return [], []
    
    json_path = os.path.join(chai_analysis_dir, json_file)

    with open(json_path) as f:
        result = json.load(f)
    
    try:
        lddt_pli_list = result["lddt_pli"]["assigned_scores"]
        rmsd_list = result["rmsd"]["assigned_scores"]
    except KeyError as e:
        return [], []
    
    if not lddt_pli_list or not rmsd_list:
        return [], []

    chai_data_lddt_pli = []
    chai_data_rmsd = []

    for item in lddt_pli_list:
        mdl_lig_chain = item["model_ligand"].split(".")[0]
        global_index = string.ascii_uppercase.index(mdl_lig_chain)
        num_prot_chains = len(input_data[target_id]["sequences"])
        ligand_index = global_index - num_prot_chains            
        trg_lig_chain = item["reference_ligand"].split("/")[-1].split(".sdf")[0]
        lddt_pli = item["score"]
        lig_prot_pair_iptm = [data["per_chain_pair_iptm"][0][global_index][idx] for idx in range(num_prot_chains)]
        lig_prot_pair_iptm_average = sum(lig_prot_pair_iptm) / len(lig_prot_pair_iptm)
        prot_lig_pair_iptm = [data["per_chain_pair_iptm"][0][idx][global_index] for idx in range(num_prot_chains)]
        prot_lig_pair_iptm_average = sum(prot_lig_pair_iptm) / len(prot_lig_pair_iptm)
        chai_data_lddt_pli.append({
            "target": target_id,
            "method": "chai",
            "seed": seed,
            "sample": sample,
            "ranking_score": data["aggregate_score"][0],
            "prot_lig_chain_iptm_average": prot_lig_pair_iptm_average,
            "prot_lig_chain_iptm_min": min(prot_lig_pair_iptm),
            "prot_lig_chain_iptm_max": max(prot_lig_pair_iptm),
            "lig_prot_chain_iptm_average": lig_prot_pair_iptm_average,
            "lig_prot_chain_iptm_min": min(lig_prot_pair_iptm),
            "lig_prot_chain_iptm_max": max(lig_prot_pair_iptm),
            "lddt_pli": lddt_pli,
            "model_ligand_chain": mdl_lig_chain,
            "model_ligand_ccd_code": input_data[target_id]["ccd_codes"][ligand_index],
            "model_ligand_smiles": input_data[target_id]["smiles"][ligand_index],
            "target_ligand_chain": trg_lig_chain
        })     
        
    for item in rmsd_list:
        mdl_lig_chain = item["model_ligand"].split(".")[0]
        global_index = string.ascii_uppercase.index(mdl_lig_chain)
        num_prot_chains = len(input_data[target_id]["sequences"])
        ligand_index = global_index - num_prot_chains            
        trg_lig_chain = item["reference_ligand"].split("/")[-1].split(".sdf")[0]
        rmsd = item["score"]
        lddt_lp = item["lddt_lp"]
        bb_rmsd = item["bb_rmsd"]
        lig_prot_pair_iptm = [data["per_chain_pair_iptm"][0][global_index][idx] for idx in range(num_prot_chains)]
        lig_prot_pair_iptm_average = sum(lig_prot_pair_iptm) / len(lig_prot_pair_iptm)
        prot_lig_pair_iptm = [data["per_chain_pair_iptm"][0][idx][global_index] for idx in range(num_prot_chains)]
        prot_lig_pair_iptm_average = sum(prot_lig_pair_iptm) / len(prot_lig_pair_iptm)
        chai_data_rmsd.append({
            "target": target_id,
            "method": "chai",
            "seed": seed,
            "sample": sample,
            "ranking_score": data["aggregate_score"][0],
            "prot_lig_chain_iptm_average": prot_lig_pair_iptm_average,
            "prot_lig_chain_iptm_min": min(prot_lig_pair_iptm),
            "prot_lig_chain_iptm_max": max(prot_lig_pair_iptm),
            "lig_prot_chain_iptm_average": lig_prot_pair_iptm_average,
            "lig_prot_chain_iptm_min": min(lig_prot_pair_iptm),
            "lig_prot_chain_iptm_max": max(lig_prot_pair_iptm),
            "rmsd": rmsd,
            "lddt_lp": lddt_lp,
            "bb_rmsd": bb_rmsd,
            "model_ligand_chain": mdl_lig_chain,
            "model_ligand_ccd_code": input_data[target_id]["ccd_codes"][ligand_index],
            "model_ligand_smiles": input_data[target_id]["smiles"][ligand_index],
            "target_ligand_chain": trg_lig_chain
        })
   
    return chai_data_lddt_pli, chai_data_rmsd

In [None]:
def merge_to_final_df(lddt_pli, rmsd, ref_df):
    df_lddt_pli = pd.DataFrame(lddt_pli)
    df_rmsd = pd.DataFrame(rmsd)
    
    df_lddt_pli_name = (
        pd.merge(
            df_lddt_pli,
            ref_df[["system_id", "ligand_instance_chain", "ligand_ccd_code"]],
            how="left",
            left_on=["target", "target_ligand_chain"],
            right_on=["system_id", "ligand_instance_chain"]
        ).drop(columns=["system_id", "ligand_instance_chain"])
    )
    
    df_rmsd_name = (
        pd.merge(
            df_rmsd,
            ref_df[["system_id", "ligand_instance_chain", "ligand_ccd_code"]],
            how="left",
            left_on=["target", "target_ligand_chain"],
            right_on=["system_id", "ligand_instance_chain"]
        ).drop(columns=["system_id", "ligand_instance_chain"])
    )

    df_lddt_rmsd = pd.merge(
        df_lddt_pli_name,
        df_rmsd_name,
        how="outer",
        on=["target", "method", "seed", "sample", "ranking_score", "model_ligand_ccd_code", "model_ligand_smiles", "ligand_ccd_code"],
        indicator=True,          
        suffixes=("_lddt_pli", "_rmsd")
    )

    df_final = pd.merge(
        df_lddt_rmsd,
        ref_df[["system_id", "ligand_ccd_code", "ligand_instance_chain", "ligand_is_proper"]],
        how="left",    
        left_on=["target", "model_ligand_ccd_code", "target_ligand_chain_lddt_pli"],
        right_on=["system_id", "ligand_ccd_code", "ligand_instance_chain"]
    )

    df_final.drop(["system_id", "_merge", "target_ligand_chain_rmsd", "target_ligand_chain_lddt_pli"], axis=1, inplace=True)
    df_final = df_final.drop_duplicates()

    return df_final

In [None]:
ref_df = pd.read_csv("data/annotations.csv")
input_json = "data/inputs.json"
with open(input_json, 'r') as f:
    input_data = json.load(f)

METHODS = ["af3", "boltz", "chai", "protenix"]

dfs=pd.DataFrame()

for method in METHODS:
    analysis_dir = f"examples/analysis/{method}"
    out_dir = f"examples/outputs/{method}"

    lddt_pli = list()
    rmsd=list()
    filenames = list(os.listdir(analysis_dir))
    
    if method == "af3":
        partial_func = partial(process_af3_file, input_data=input_data, af3_output_dir=out_dir, af3_analysis_dir=analysis_dir)
        with Pool(processes=32) as pool:
            for lddt_pli_single, rmsd_single in pool.imap(partial_func, filenames):
                lddt_pli.extend(lddt_pli_single)
                rmsd.extend(rmsd_single)

    if method == "boltz":
        partial_func = partial(process_boltz_file, input_data=input_data, boltz_output_dir=out_dir, boltz_analysis_dir=analysis_dir)
        with Pool(processes=32) as pool:
            for lddt_pli_single, rmsd_single in pool.imap(partial_func, filenames):
                lddt_pli.extend(lddt_pli_single)
                rmsd.extend(rmsd_single)

    if method == "chai":
        partial_func = partial(process_chai_file, input_data=input_data, chai_output_dir=out_dir, chai_analysis_dir=analysis_dir)
        with Pool(processes=32) as pool:
            for lddt_pli_single, rmsd_single in pool.imap(partial_func, filenames):
                lddt_pli.extend(lddt_pli_single)
                rmsd.extend(rmsd_single)


    if method == "protenix":
        partial_func = partial(process_protenix_file, input_data=input_data, protenix_output_dir=out_dir, protenix_analysis_dir=analysis_dir)
        with Pool(processes=32) as pool:
            for lddt_pli_single, rmsd_single in pool.imap(partial_func, filenames):
                lddt_pli.extend(lddt_pli_single)
                rmsd.extend(rmsd_single)


    df = merge_to_final_df(lddt_pli, rmsd, ref_df)
    
    dfs = pd.concat([dfs, df], ignore_index=True)

dfs