# Calculations of template dependend metrics for AF3
created by Andreas 2025-02-19

This notebook is for calculating the template depended metrics on the AF3 output. It is based on the script *calculate_template_dependent_metrics.py* by Chop Yan Lee.

It will add the following columns:
* RMSD_domain: Align the domains of the DMI structures and calculate the RMSD
* num_align_atoms_domain: From the domain alignment the number of aligned atoms
* num_align_resi_domain:From the domain alignment the number of aligned residues
* align_score_domain: Pymol score for the alignment
* RMSD_backbone_peptide: After aligning the domains, calculate the RMSD of the backbone in the motif
* RMSD_all_atom_peptide: After aligning the domains, calculate the RMSD of all atoms in the motif
* RMSD_DDI_max: In contrast to DMI, DDI structures have no domain. But still it is possible to perform an alignment for each chain individually. This column contains the maximum RMSD (= worst) of the two chains


In contrast to the AF2 metrics, the following columns were not added
* known_motif_plddt: Included in the header, but no values were calculated


### 0 Imports + Settings

In [1]:
# Imports
from pathlib import Path
import pandas as pd
import numpy as np
import tempfile
import shutil
import subprocess
import os

import pymol
from Bio.PDB import PDBParser
from Bio.PDB.Structure import Structure as BioPy_PDBStructure
from Bio.PDB.Model import Model as BioPy_PDBModel
from Bio.PDB.PDBExceptions import PDBConstructionException
parser = PDBParser(QUIET=True)

In [6]:
# Settings

# The base folder of the AF output. The AF3 files are searched inside /Alpha
luck_drive_folder = Path("L:/imb-luckgr2/projects/AlphaFold") 

# The path to the ipsae.py
path_ipsae_script = Path("../code ressources/ipsae.py")

# Paths to the solved structures
path_solved_structures = Path("../ressources/solved").resolve()
path_DMI_solved = path_solved_structures / "DMI_solved_structures"
path_DDI_solved = path_solved_structures / "DDI_solved_structures"

# Path to the parsed AF3 output
path_AF3_structures = Path("../ressources/AF3").resolve()

# Path to metric file
#path_metric_file = Path("../ressources/AF3/AF3_metrics.tsv").resolve()
path_metric_file = Path("../ressources/AF3/AF3_metrics_full.tsv").resolve() # Append data

In [3]:
# Loading the files
DMI_solved = [p for p in (path_DMI_solved).iterdir() if p.is_file() and p.suffix.lower() == ".pdb"]
DDI_solved = [p for p in (path_DDI_solved).iterdir() if p.is_file() and p.suffix.lower() == ".pdb"]

In [None]:
# Pymol debugging
# For debugging it is useful to visualize the structures in pymol. Uncomment this line before any structure is loaded
#pymol.finish_launching()

### 1 Read structures and metadata

In [4]:
# Parsing the file names and structures for the solved structures
dataSolved = pd.DataFrame(columns=["set", "PDB_id", "ddi_pfam_id", "path", "chainA_id", "chainB_id"])

# DMI
for structure_file in DMI_solved:
    pdb_id = structure_file.name.split("_")[0]
    if not len(pdb_id) == 4:
        raise RuntimeError(f"Unexpected file name {structure_file.name}")
    
    dataSolved.loc[len(dataSolved)] = {"set" : "DMI", "PDB_id": pdb_id, "path": structure_file.relative_to(path_solved_structures), "chainA_id": "A", "chainB_id": "B"}

# DDI
for structure_file in DDI_solved:
    ddi_pfam_id = "_".join(structure_file.name.split("_")[0:2])
    pdb_id = structure_file.name.split("_")[2]
    chainA_id = structure_file.name.split("_")[3][0]
    chainB_id = structure_file.name.split("_")[3][1]

    dataSolved.loc[len(dataSolved)] = {"set" : "DDI", "PDB_id": pdb_id, "ddi_pfam_id": ddi_pfam_id, "path": structure_file.relative_to(path_solved_structures), "chainA_id": chainA_id, "chainB_id": chainB_id}

dataSolved

Unnamed: 0,set,PDB_id,ddi_pfam_id,path,chainA_id,chainB_id
0,DMI,1ATP,,DMI_solved_structures\1ATP_min_DMI.pdb,A,B
1,DMI,1AXC,,DMI_solved_structures\1AXC_min_DMI.pdb,A,B
2,DMI,1B72,,DMI_solved_structures\1B72_min_DMI.pdb,A,B
3,DMI,1B8Q,,DMI_solved_structures\1B8Q_min_DMI.pdb,A,B
4,DMI,1BXX,,DMI_solved_structures\1BXX_min_DMI.pdb,A,B
...,...,...,...,...,...,...
181,DDI,3ZNI,PF14447_PF00179,DDI_solved_structures\PF14447_PF00179_3ZNI_AC.pdb,A,C
182,DDI,3J7Y,PF14978_PF00327,DDI_solved_structures\PF14978_PF00327_3J7Y_oZ.pdb,o,Z
183,DDI,6D6Q,PF15985_PF10175,DDI_solved_structures\PF15985_PF10175_6D6Q_GL.pdb,G,L
184,DDI,3KZ1,PF17838_PF00071,DDI_solved_structures\PF17838_PF00071_3KZ1_BE.pdb,B,E


In [None]:
# Read parsed AF3 metric file

# Default integer column does not support null. Therefore, to prevent marking them as float, override pandas dtype for this columns
intcolumns = ["chainA_start", "chainA_end", "chainB_start", "chainB_end", "num_align_atoms_domain", "num_align_resi_domain", "num_mutations"]
dataAF = pd.read_csv(path_metric_file, sep="\t", dtype={c: pd.Int16Dtype() for c in intcolumns})
dataAF

Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,num_mutations,ranking_score,chainA_length,chainB_length,fraction_disordered,has_clash,...,sequence_initial,sequence_mutated,chainA_id,chainB_id,chainA_start,chainA_end,chainB_start,chainB_end,chains_flipped,model_path
0,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_0,,0.28,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
1,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_1,,0.25,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
2,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_2,,0.22,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
3,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_3,,0.19,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
4,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_4,,0.17,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_0,,0.93,312,4,0.02,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3176,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_1,,0.92,312,4,0.01,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3177,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_2,,0.92,312,4,0.02,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3178,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_3,,0.91,312,4,0.01,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...


### 2 Pymol calculations

In [11]:
# Calculating the RMSD related values using pymol

dataAF["RMSD_domain"] = None
dataAF["align_score_domain"] = None
dataAF["num_align_atoms_domain"] = None
dataAF["num_align_resi_domain"] = None
dataAF["RMSD_backbone_peptide"] = None
dataAF["RMSD_all_atom_peptide"] = None
dataAF["RMSD_DDI_max"] = None
dataAF["RMSD_all_atom"] = None

for i,row in dataAF.iterrows():
    benchmark_set = str(row["benchmark_set"])
    _set = "DDI" if benchmark_set in ["known_ddi", "random_ddi"] else "DMI"
    pdb_id = str(row["PDB_id"]) if row.notnull()["PDB_id"] else None
    pdb_id_2 = str(row["PDB_id_random_paired"]) if row.notnull()["PDB_id_random_paired"] else None
    ddi_pfam_id = str(row["ddi_pfam_id"]) if row.notnull()["ddi_pfam_id"] else None
    ddi_pfam_id_2 = str(row["ddi_pfam_id_random_paired"]) if row.notnull()["ddi_pfam_id_random_paired"] else None
    prediction_name = str(row["prediction_name"]) if row.notnull()["prediction_name"] else None
    model_id = str(row["model_id"]) if row.notnull()["model_id"] else None
    chainA_id = str(row["chainA_id"]) if row.notnull()["chainA_id"] else None
    chainB_id = str(row["chainB_id"]) if row.notnull()["chainB_id"] else None
    chainA_start = int(row["chainA_start"]) if row.notnull()["chainA_start"] else None
    chainB_start = int(row["chainB_start"]) if row.notnull()["chainB_start"] else None
    chainA_end = int(row["chainA_end"]) if row.notnull()["chainA_end"] else None
    chainB_end = int(row["chainB_end"]) if row.notnull()["chainB_end"] else None

    if model_id == "ranked_0":
        print(prediction_name, benchmark_set)    

    structure_path = path_AF3_structures / _set / benchmark_set / prediction_name / (model_id + ".pdb")
    if not structure_path.exists():
        raise RuntimeError(f"For {prediction_name}/{model_id} the structure file is missing. Skip this structure")
    
    template_row = dataSolved.loc[np.logical_and(dataSolved["set"] == _set, np.logical_and(dataSolved["PDB_id"] == pdb_id, np.logical_or(dataSolved["ddi_pfam_id"] == ddi_pfam_id, dataSolved["ddi_pfam_id"].isna())))]
    if len(template_row) == 0:
        print(f"\tCan't find template structure for {prediction_name} ({benchmark_set}) and PDB ID {pdb_id}. Skip RMSD calculation")
        continue
    elif len(template_row) >= 2:
        print(f"\tMultiple template structures found for {prediction_name} ({benchmark_set}) and PDB ID {pdb_id}. Skip RMSD calculation")
        continue

    template_path = path_solved_structures / str(template_row["path"].item())
    if not template_path.exists():
        if model_id == "ranked_0":
            print(f"\tSkipping {prediction_name} as there is no template file")
        continue

    template2_path = None
    if pdb_id_2 is not None:
        template2_row = dataSolved.loc[np.logical_and(dataSolved["set"] == _set, np.logical_and(dataSolved["PDB_id"] == pdb_id_2, np.logical_or(dataSolved["ddi_pfam_id"] == ddi_pfam_id_2, dataSolved["ddi_pfam_id"].isna())))]
        if len(template2_row) == 0:
            print(f"\tCan't find template structure for {prediction_name} ({benchmark_set}) and PDB ID {pdb_id_2}. Skip RMSD calculation")
            continue
        elif len(template2_row) >= 2:
            print(f"\tMultiple template structures found for {prediction_name} ({benchmark_set}) and PDB ID {pdb_id_2}. Skip RMSD calculation")
            continue

        template2_path = path_solved_structures / str(template2_row["path"].item())
        if not template2_path.exists():
            if model_id == "ranked_0":
                print(f"\tSkipping {prediction_name} as there is no template")
            continue

    #pymol.cmd.reinitialize() # Not needed usually, but slows performance significantly down
    for o in pymol.cmd.get_object_list():
        pymol.cmd.delete("all")
    pymol.cmd.sort()

    # First loading the structures. Use two temporary objects to allow renaming the chains even if the chains have the same name or have switched IDs
    pymol.cmd.load(structure_path, "af")
    if template2_path is not None:
        # Updating the object is possible, but turned out to be unstable
        pymol.cmd.load(template_path, "solvedA")
        pymol.cmd.load(template2_path, "solvedB")
        pymol.cmd.create("solved1", f"solvedA and chain {chainA_id}")
        pymol.cmd.create("solved2", f"solvedB and chain {chainB_id}")
        pymol.cmd.delete("solvedA")
        pymol.cmd.delete("solvedB")
    else:
        pymol.cmd.load(template_path, "solvedraw")
        pymol.cmd.create("solved1", f"solvedraw and chain {chainA_id}")
        pymol.cmd.sort()
        pymol.cmd.create("solved2", f"solvedraw and chain {chainB_id}")
        pymol.cmd.delete("solvedraw")
    pymol.cmd.sort()
    # Now rename the chains and create merged object
    pymol.cmd.alter(f"solved1 and chain {chainA_id}", "chain = 'A'")
    pymol.cmd.sort()
    pymol.cmd.alter(f"solved2 and chain {chainB_id}", "chain = 'B'")
    pymol.cmd.sort()
    pymol.cmd.create("solved", f"solved1 or solved2")
    pymol.cmd.delete("solved1")
    pymol.cmd.delete("solved2")
    pymol.cmd.sort()

    # Remove hydrogens and hetatm
    pymol.cmd.remove(selection="elem 'H' or hetatm")
    pymol.cmd.sort()

    # Remove alternate location identifiers
    pymol.cmd.remove("not alt ''+A") # Using +A syntax to only effect the atoms with an alternate location identifier set
    pymol.cmd.sort()
    pymol.cmd.alter("all", "alt=''")
    pymol.cmd.sort()

    # Slice the chains to the known start/end residues. For chain B and AF a reindexing is performed as the rms_cur cmd of pymol requires same residue numbers for alignment
    if chainA_start is not None and chainB_start is not None:
        pymol.cmd.create("solved", f"solved and ((chain A and resi {chainA_start}-{chainA_end}) or (chain B and resi {chainB_start}-{chainB_end}))", source_state=0, target_state=0)
        pymol.cmd.sort()
        offset = chainB_start - 1
        pymol.cmd.alter("af and chain B", f"resi = (int(resi) + {offset})")
        pymol.cmd.sort()
    else:
        print(f"\tCan't find information about the chain start/end in the template. This may lead to wrong RMSD peptide values, so skip")
        continue

    pymol.cmd.sort()

    #For debugging
    #space = {'solved_resi': [], "af_resi": []}
    #pymol.cmd.iterate("solved and chain B", "solved_resi.append(int(resi))", space=space)
    #pymol.cmd.iterate("af and chain B", "af_resi.append(int(resi))", space=space)

    #    0: RMSD after refinement
    #    1: Number of aligned atoms after refinement
    #    2: Number of refinement cycles
    #    3: RMSD before refinement
    #    4: Number of aligned atoms before refinement
    #    5: Raw alignment score
    #    6: Number of residues aligned
    # Cycles = 0 to prevent rejection of outliers
    align_output_1 = pymol.cmd.align(mobile="af and chain A", target="solved and chain A", object="algn_domain", cycles=0)
    pymol.cmd.sort()
    RMSD_domain = align_output_1[0]
    num_align_atoms_domain = align_output_1[1]
    align_score_domain = align_output_1[5]
    num_align_resi_domain = align_output_1[6]

    if _set == "DDI":
        align_output_2 = pymol.cmd.align(mobile="af and chain B", target="solved and chain B", object="algn_domain2", cycles=0, )
        RMSD_domain_2 = align_output_2[0]
        dataAF.at[i, "RMSD_DDI_max"] = np.max([RMSD_domain, RMSD_domain_2])
    elif _set == "DMI":
        dataAF.at[i, "RMSD_domain"] =  RMSD_domain
        dataAF.at[i, "align_score_domain"] =  align_score_domain
        dataAF.at[i, "num_align_atoms_domain"] =  num_align_atoms_domain
        dataAF.at[i, "num_align_resi_domain"] =  num_align_resi_domain

    align_output = pymol.cmd.align(mobile="af", target="solved", object="algn_domain3", cycles=0, )
    RMSD_all_atoms = align_output[0]
    dataAF.at[i, "RMSD_all_atom"] =  RMSD_all_atoms

    if benchmark_set in ["known_minimal", "mutations"]:
        RMSD_backbone_peptide = pymol.cmd.rms_cur(mobile="af and chain B and bb.", target="solved and chain B and bb.", object="peptide_super_bb")
        RMSD_all_atom_peptide = pymol.cmd.rms_cur(mobile="af and chain B", target="solved and chain B", object="peptide_super_all_atoms")
        dataAF.at[i, "RMSD_backbone_peptide"] =  RMSD_backbone_peptide
        dataAF.at[i, "RMSD_all_atom_peptide"] =  RMSD_all_atom_peptide

PF00009_PF01873_2D74_A_resi12_resi200.B_resi21_resi133 known_ddi
PF00026_PF06394_1F34_A_resi13_resi326.B_resi62_resi120 known_ddi
PF00059_PF00041_1TDQ_B_resi10_resi125.A_resi85_resi186 known_ddi
PF00089_PF00095_1FLE_E_resi16_resi243.I_resi12_resi56 known_ddi
PF00137_PF07850_6VQG_i_resi7_resi86.p_resi292_resi343 known_ddi
PF00244_PF01161_3AXY_J_resi4_resi233.H_resi19_resi169 known_ddi
PF00289_PF02436_5VYW_A_resi1_resi120.D_resi810_resi1030 known_ddi
PF00454_PF00017_2Y3A_A_resi794_resi1010.B_resi616_resi690 known_ddi
PF00514_PF00104_3TX7_A_resi148_resi661.B_resi316_resi533 known_ddi
PF00675_PF02271_1PP9_B_resi35_resi180.S_resi12_resi105 known_ddi
PF00787_PF03643_5F0L_B_resi8_resi283.C_resi58_resi147 known_ddi
PF00858_PF00087_7CFT_A_resi48_resi461.D_resi1_resi56 known_ddi
PF00890_PF13085_1L0V_M_resi1_resi406.N_resi2_resi121 known_ddi
PF01298_PF00405_3VE1_A_resi174_resi345.B_resi342_resi664 known_ddi
PF02351_PF17812_6Q2N_D_resi243_resi337.F_resi265_resi379 known_ddi
PF02372_PF18707_4GS7_A_

In [12]:
display(dataAF)

Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,num_mutations,ranking_score,chainA_length,chainB_length,fraction_disordered,has_clash,...,chains_flipped,model_path,RMSD_domain,align_score_domain,num_align_atoms_domain,num_align_resi_domain,RMSD_backbone_peptide,RMSD_all_atom_peptide,RMSD_DDI_max,RMSD_all_atom
0,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_0,,0.28,189,113,0.04,0.0,...,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...,,,,,,,5.313323,15.168005
1,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_1,,0.25,189,113,0.04,0.0,...,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...,,,,,,,4.992111,15.852812
2,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_2,,0.22,189,113,0.04,0.0,...,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...,,,,,,,4.29104,15.517168
3,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_3,,0.19,189,113,0.04,0.0,...,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...,,,,,,,4.732747,16.243053
4,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_4,,0.17,189,113,0.04,0.0,...,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...,,,,,,,4.57719,16.653427
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_0,,0.93,312,4,0.02,0.0,...,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...,0.684738,1624.0,2356,303,,,,7.710459
3176,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_1,,0.92,312,4,0.01,0.0,...,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...,0.684467,1624.0,2356,303,,,,7.707559
3177,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_2,,0.92,312,4,0.02,0.0,...,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...,1.018179,1624.0,2356,303,,,,7.750147
3178,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_3,,0.91,312,4,0.01,0.0,...,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...,0.658395,1624.0,2356,303,,,,7.704717


### 3 DockQ


In [13]:
from DockQ.DockQ import load_PDB, run_on_all_native_interfaces

dataAF["DockQ"] = np.nan
dataAF["iRMSD"] = np.nan
dataAF["LRMSD"] = np.nan
dataAF["Fnonnat"] = np.nan
for i, row in dataAF[dataAF["benchmark_set"].isin(["known_minimal", "known_ddi"])].iterrows():
    benchmark_set = str(row["benchmark_set"])
    _set = "DDI" if benchmark_set in ["known_ddi", "random_ddi"] else "DMI"
    pdb_id = str(row["PDB_id"]) if row.notnull()["PDB_id"] else None
    pdb_id_2 = str(row["PDB_id_random_paired"]) if row.notnull()["PDB_id_random_paired"] else None
    ddi_pfam_id = str(row["ddi_pfam_id"]) if row.notnull()["ddi_pfam_id"] else None
    ddi_pfam_id_2 = str(row["ddi_pfam_id_random_paired"]) if row.notnull()["ddi_pfam_id_random_paired"] else None
    prediction_name = str(row["prediction_name"]) if row.notnull()["prediction_name"] else None
    model_id = str(row["model_id"]) if row.notnull()["model_id"] else None
    chainA_id = str(row["chainA_id"]) if row.notnull()["chainA_id"] else None
    chainB_id = str(row["chainB_id"]) if row.notnull()["chainB_id"] else None
    chainA_start = int(row["chainA_start"]) if row.notnull()["chainA_start"] else None
    chainB_start = int(row["chainB_start"]) if row.notnull()["chainB_start"] else None
    chainA_end = int(row["chainA_end"]) if row.notnull()["chainA_end"] else None
    chainB_end = int(row["chainB_end"]) if row.notnull()["chainB_end"] else None

    if model_id == "ranked_0":
        print(f"{prediction_name} ({benchmark_set})")

    template_row = dataSolved.loc[np.logical_and(dataSolved["set"] == _set, np.logical_and(dataSolved["PDB_id"] == pdb_id, np.logical_or(dataSolved["ddi_pfam_id"] == ddi_pfam_id, dataSolved["ddi_pfam_id"].isna())))]
    if len(template_row) == 0:
        print(f"\tCan't find template structure for {prediction_name} ({benchmark_set}) and PDB ID {pdb_id}. Skip")
        continue
    elif len(template_row) >= 2:
        print(f"\tMultiple template structures found for {prediction_name} ({benchmark_set}) and PDB ID {pdb_id}. Skip")
        continue

    structure_path = path_AF3_structures / _set / benchmark_set / prediction_name / (model_id + ".pdb")
    template_path = path_solved_structures / str(template_row["path"].item())
    dockq_structure_af = load_PDB(str(structure_path))
    dockq_structure_solved = load_PDB(str(template_path))

    chain_map = {chainA_id: "A", chainB_id:"B"}
    chain_key = chainA_id + chainB_id

    result = run_on_all_native_interfaces(dockq_structure_af, dockq_structure_solved, chain_map=chain_map)[0]
    dataAF.at[i, "DockQ"] = result[chain_key]["DockQ"]
    dataAF.at[i, "iRMSD"] = result[chain_key]["iRMSD"]
    dataAF.at[i, "LRMSD"] = result[chain_key]["LRMSD"]
    dataAF.at[i, "Fnonnat"] = np.float64(result[chain_key]["fnonnat"])

display(dataAF)


PF00009_PF01873_2D74_A_resi12_resi200.B_resi21_resi133 (known_ddi)
PF00026_PF06394_1F34_A_resi13_resi326.B_resi62_resi120 (known_ddi)
PF00059_PF00041_1TDQ_B_resi10_resi125.A_resi85_resi186 (known_ddi)
PF00089_PF00095_1FLE_E_resi16_resi243.I_resi12_resi56 (known_ddi)
PF00137_PF07850_6VQG_i_resi7_resi86.p_resi292_resi343 (known_ddi)
PF00244_PF01161_3AXY_J_resi4_resi233.H_resi19_resi169 (known_ddi)
PF00289_PF02436_5VYW_A_resi1_resi120.D_resi810_resi1030 (known_ddi)
PF00454_PF00017_2Y3A_A_resi794_resi1010.B_resi616_resi690 (known_ddi)
PF00514_PF00104_3TX7_A_resi148_resi661.B_resi316_resi533 (known_ddi)
PF00675_PF02271_1PP9_B_resi35_resi180.S_resi12_resi105 (known_ddi)
PF00787_PF03643_5F0L_B_resi8_resi283.C_resi58_resi147 (known_ddi)
PF00858_PF00087_7CFT_A_resi48_resi461.D_resi1_resi56 (known_ddi)
PF00890_PF13085_1L0V_M_resi1_resi406.N_resi2_resi121 (known_ddi)
PF01298_PF00405_3VE1_A_resi174_resi345.B_resi342_resi664 (known_ddi)
PF02351_PF17812_6Q2N_D_resi243_resi337.F_resi265_resi379 (know

Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,num_mutations,ranking_score,chainA_length,chainB_length,fraction_disordered,has_clash,...,num_align_atoms_domain,num_align_resi_domain,RMSD_backbone_peptide,RMSD_all_atom_peptide,RMSD_DDI_max,RMSD_all_atom,DockQ,iRMSD,LRMSD,Fnonnat
0,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_0,,0.28,189,113,0.04,0.0,...,,,,,5.313323,15.168005,0.050360,13.928462,24.152736,0.884615
1,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_1,,0.25,189,113,0.04,0.0,...,,,,,4.992111,15.852812,0.047522,15.915324,24.902460,0.884615
2,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_2,,0.22,189,113,0.04,0.0,...,,,,,4.29104,15.517168,0.048263,15.056429,24.743910,0.892857
3,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_3,,0.19,189,113,0.04,0.0,...,,,,,4.732747,16.243053,0.049288,14.866893,25.704516,0.894737
4,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_4,,0.17,189,113,0.04,0.0,...,,,,,4.57719,16.653427,0.023410,17.734430,32.745436,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_0,,0.93,312,4,0.02,0.0,...,2356,303,,,,7.710459,,,,
3176,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_1,,0.92,312,4,0.01,0.0,...,2356,303,,,,7.707559,,,,
3177,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_2,,0.92,312,4,0.02,0.0,...,2356,303,,,,7.750147,,,,
3178,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_3,,0.91,312,4,0.01,0.0,...,2356,303,,,,7.704717,,,,


### 4 IPSAE metric

In [18]:
def calc_ipsae_metric(row: pd.Series):
    path_cif = luck_drive_folder / "AlphaFold3" / Path(row["model_path"])
    path_confidences = path_cif.parent / "confidences.json"

    with tempfile.TemporaryDirectory() as tmpdir:

        shutil.copy(path_cif, tmp_path_cif := (Path(tmpdir) / "model.cif"))
        shutil.copy(path_confidences, tmp_path_confidences := (Path(tmpdir) / "confidences.json"))
        subprocess.run(["python", path_ipsae_script, tmp_path_confidences, tmp_path_cif, "10", "10"], env=os.environ.copy())

        path_output = Path(tmpdir) / "model_10_10.txt"

        df_ipsae = pd.read_csv(path_output, header=0, skiprows=[0], sep=" ", skipinitialspace=True)

    return df_ipsae

dataAF["ipSAE"] = np.nan
for i, row in dataAF.iterrows():
    if row["model_id"] == "ranked_0":
        print(row["prediction_name"], f"({round(100*i/len(dataAF))} %)")
    df_ipsae = calc_ipsae_metric(row)
    dataAF.at[i, "ipSAE"] = np.float64(df_ipsae["ipSAE"][2])
display(dataAF)

PF00009_PF01873_2D74_A_resi12_resi200.B_resi21_resi133 (0 %)
PF00026_PF06394_1F34_A_resi13_resi326.B_resi62_resi120 (0 %)
PF00059_PF00041_1TDQ_B_resi10_resi125.A_resi85_resi186 (0 %)
PF00089_PF00095_1FLE_E_resi16_resi243.I_resi12_resi56 (0 %)
PF00137_PF07850_6VQG_i_resi7_resi86.p_resi292_resi343 (1 %)
PF00244_PF01161_3AXY_J_resi4_resi233.H_resi19_resi169 (1 %)
PF00289_PF02436_5VYW_A_resi1_resi120.D_resi810_resi1030 (1 %)
PF00454_PF00017_2Y3A_A_resi794_resi1010.B_resi616_resi690 (1 %)
PF00514_PF00104_3TX7_A_resi148_resi661.B_resi316_resi533 (1 %)
PF00675_PF02271_1PP9_B_resi35_resi180.S_resi12_resi105 (1 %)
PF00787_PF03643_5F0L_B_resi8_resi283.C_resi58_resi147 (2 %)
PF00858_PF00087_7CFT_A_resi48_resi461.D_resi1_resi56 (2 %)
PF00890_PF13085_1L0V_M_resi1_resi406.N_resi2_resi121 (2 %)
PF01298_PF00405_3VE1_A_resi174_resi345.B_resi342_resi664 (2 %)
PF02351_PF17812_6Q2N_D_resi243_resi337.F_resi265_resi379 (2 %)
PF02372_PF18707_4GS7_A_resi2_resi112.B_resi6_resi97 (2 %)
PF02747_PF00752_1UL1_X_re

Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,num_mutations,ranking_score,chainA_length,chainB_length,fraction_disordered,has_clash,...,sequence_initial,sequence_mutated,chainA_id,chainB_id,chainA_start,chainA_end,chainB_start,chainB_end,chains_flipped,model_path
0,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_0,,0.28,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
1,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_1,,0.25,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
2,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_2,,0.22,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
3,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_3,,0.19,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
4,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_4,,0.17,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_0,,0.93,312,4,0.02,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3176,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_1,,0.92,312,4,0.01,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3177,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_2,,0.92,312,4,0.02,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3178,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_3,,0.91,312,4,0.01,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...


### 5 Column rounding and enchancement

In [42]:
dataAF.columns

Index(['model_preset', 'benchmark_set', 'prediction_name', 'model_id',
       'num_mutations', 'ranking_score', 'chainA_length', 'chainB_length',
       'fraction_disordered', 'has_clash', 'iptm', 'ptm',
       'chainA_intf_avg_plddt', 'chainB_intf_avg_plddt', 'intf_avg_plddt',
       'num_chainA_intf_res', 'num_chainB_intf_res', 'num_res_res_contact',
       'num_atom_atom_contact', 'iPAE', 'pDockQ', 'PDB_id', 'ELM_instance',
       'ddi_pfam_id', 'PDB_id_random_paired', 'ELM_instance_random_paired',
       'ddi_pfam_id_random_paired', 'sequence_initial', 'sequence_mutated',
       'chainA_id', 'chainB_id', 'chainA_start', 'chainA_end', 'chainB_start',
       'chainB_end', 'chains_flipped', 'model_path', 'RMSD_domain',
       'align_score_domain', 'num_align_atoms_domain', 'num_align_resi_domain',
       'RMSD_backbone_peptide', 'RMSD_all_atom_peptide', 'RMSD_DDI_max',
       'RMSD_all_atom', 'DockQ', 'iRMSD', 'LRMSD', 'Fnonnat', 'ipSAE'],
      dtype='object')

In [43]:
c = list(dataAF.columns)

for n in ['PDB_id', 'ELM_instance',
       'ddi_pfam_id', 'PDB_id_random_paired', 'ELM_instance_random_paired',
       'ddi_pfam_id_random_paired', 'sequence_initial', 'sequence_mutated',
       'chainA_id', 'chainB_id', 'chainA_start', 'chainA_end', 'chainB_start',
       'chainB_end', 'chains_flipped', 'model_path']:
    c.remove(n)
    c.append(n)
dataAF = dataAF[c]
dataAF

Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,num_mutations,ranking_score,chainA_length,chainB_length,fraction_disordered,has_clash,...,sequence_initial,sequence_mutated,chainA_id,chainB_id,chainA_start,chainA_end,chainB_start,chainB_end,chains_flipped,model_path
0,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_0,,0.28,189,113,0.04,0.0,...,,,A,B,12.0,200.0,21.0,133.0,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
1,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_1,,0.25,189,113,0.04,0.0,...,,,A,B,12.0,200.0,21.0,133.0,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
2,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_2,,0.22,189,113,0.04,0.0,...,,,A,B,12.0,200.0,21.0,133.0,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
3,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_3,,0.19,189,113,0.04,0.0,...,,,A,B,12.0,200.0,21.0,133.0,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
4,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_4,,0.17,189,113,0.04,0.0,...,,,A,B,12.0,200.0,21.0,133.0,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_0,,0.93,312,4,0.02,0.0,...,,,A,B,59.0,361.0,140.0,143.0,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3176,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_1,,0.92,312,4,0.01,0.0,...,,,A,B,59.0,361.0,140.0,143.0,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3177,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_2,,0.92,312,4,0.02,0.0,...,,,A,B,59.0,361.0,140.0,143.0,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3178,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_3,,0.91,312,4,0.01,0.0,...,,,A,B,59.0,361.0,140.0,143.0,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...


In [19]:
dataAF["chainA_start"] = dataAF["chainA_start"].astype(pd.Int64Dtype())
dataAF["chainA_end"] = dataAF["chainA_end"].astype(pd.Int64Dtype())
dataAF["chainB_start"] = dataAF["chainB_start"].astype(pd.Int64Dtype())
dataAF["chainB_end"] = dataAF["chainB_end"].astype(pd.Int64Dtype())
dataAF["num_mutations"] = dataAF["num_mutations"].astype(pd.Int64Dtype())
dataAF["num_align_atoms_domain"] = dataAF["num_align_atoms_domain"].astype(pd.Int64Dtype())
dataAF["num_align_resi_domain"] = dataAF["num_align_resi_domain"].astype(pd.Int64Dtype())
dataAF

Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,num_mutations,ranking_score,chainA_length,chainB_length,fraction_disordered,has_clash,...,sequence_initial,sequence_mutated,chainA_id,chainB_id,chainA_start,chainA_end,chainB_start,chainB_end,chains_flipped,model_path
0,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_0,,0.28,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
1,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_1,,0.25,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
2,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_2,,0.22,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
3,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_3,,0.19,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
4,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_4,,0.17,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_0,,0.93,312,4,0.02,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3176,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_1,,0.92,312,4,0.01,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3177,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_2,,0.92,312,4,0.02,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3178,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_3,,0.91,312,4,0.01,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...


In [53]:
dataAF = dataAF_copy.copy()
dataAF

Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,num_mutations,ranking_score,chainA_length,chainB_length,fraction_disordered,has_clash,...,sequence_initial,sequence_mutated,chainA_id,chainB_id,chainA_start,chainA_end,chainB_start,chainB_end,chains_flipped,model_path
0,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_0,,0.28,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
1,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_1,,0.25,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
2,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_2,,0.22,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
3,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_3,,0.19,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
4,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_4,,0.17,189,113,0.04,0.0,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_0,,0.93,312,4,0.02,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3176,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_1,,0.92,312,4,0.01,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3177,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_2,,0.92,312,4,0.02,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3178,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_3,,0.91,312,4,0.01,0.0,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...


In [54]:
for c in ["fraction_disordered", "has_clash", "iptm", "ptm", "ranking_score", "chainA_intf_avg_plddt", "chainB_intf_avg_plddt", "intf_avg_plddt", "iPAE", "pDockQ"]:
    dataAF[c] = dataAF[c].apply(lambda r: '{:,.2f}'.format(r) if not r is None and not np.isnan(r) else None)

for c in ["RMSD_backbone_peptide", "RMSD_all_atom_peptide", "RMSD_DDI_max", "RMSD_all_atom", "DockQ", "Fnonnat", "iRMSD", "LRMSD"]:
    dataAF[c] = dataAF[c].apply(lambda r: '{:,.4f}'.format(r) if not r is None and not np.isnan(r) else None)
dataAF

Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,num_mutations,ranking_score,chainA_length,chainB_length,fraction_disordered,has_clash,...,sequence_initial,sequence_mutated,chainA_id,chainB_id,chainA_start,chainA_end,chainB_start,chainB_end,chains_flipped,model_path
0,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_0,,0.28,189,113,0.04,0.00,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
1,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_1,,0.25,189,113,0.04,0.00,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
2,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_2,,0.22,189,113,0.04,0.00,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
3,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_3,,0.19,189,113,0.04,0.00,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
4,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_4,,0.17,189,113,0.04,0.00,...,,,A,B,12,200,21,133,True,AlphaFold_benchmark_DDI\known_ddi\suspicious_c...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_0,,0.93,312,4,0.02,0.00,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3176,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_1,,0.92,312,4,0.01,0.00,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3177,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_2,,0.92,312,4,0.02,0.00,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...
3178,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_3,,0.91,312,4,0.01,0.00,...,,,A,B,59,361,140,143,True,AlphaFold_benchmark_DMI\random_minimal\gloomy_...


In [20]:
# Save file
dataAF.to_csv(path_metric_file.parent / "AF3_metrics_full.tsv", sep="\t", index=0)
dataAF.to_excel(path_metric_file.parent / "AF3_metrics_full.xlsx", sheet_name="AF3", index=False)

### 6 Generate colum