# Calculations of template dependend metrics for AF3
created by Andreas 2025-02-19

This notebook is for calculating the template depended metrics on the AF3 output. It is based on the script *calculate_template_dependent_metrics.py* by Chop Yan Lee.

It will add the following columns: RMSD_domain, num_align_atoms_domain, align_score_domain, num_align_resi_domain, RMSD_backbone_peptide, RMSD_all_atom_peptide, known_motif_plddt, DockQ, iRMS, LRMS and num_mutation_in_motif. The column Fnonnat is not calculated by this script.

### 0 Imports + Settings

In [1]:
# Imports
from pathlib import Path
import pandas as pd
import numpy as np
import difflib

import pymol
from Bio.PDB import PDBParser
from Bio.PDB.Structure import Structure as BioPy_PDBStructure
from Bio.PDB.Model import Model as BioPy_PDBModel
from Bio.PDB.PDBExceptions import PDBConstructionException
parser = PDBParser(QUIET=True)

In [2]:
# Paths to the solved structures
path_DMI_solved = Path("../ressources/solved/DMI_solved_structures").resolve()
path_DDI_solved = Path("../ressources/solved/DDI_solved_structures").resolve()

# Path to the parsed AF3 output
path_AF3_structures = Path("../ressources/AF3").resolve()

In [3]:
# Loading the files
DMI_solved = [p for p in (path_DMI_solved).iterdir() if p.is_file() and p.suffix.lower() == ".pdb"]
DDI_solved = [p for p in (path_DDI_solved).iterdir() if p.is_file() and p.suffix.lower() == ".pdb"]

In [6]:
pymol.finish_launching()

### 1 Read solved structures

In [None]:
# Parsing the file names and structures for the solved structures
dataSolved = pd.DataFrame(columns=["set", "PDB_id", "ddi_pfam_id", "chainA", "chainB"])

# First DMI
for structure_file in DMI_solved:
    pdb_id = structure_file.name.split("_")[0]
    if not len(pdb_id) == 4:
        raise RuntimeError(f"Unexpected file name {structure_file.name}")
    
    structure_biopy = parser.get_structure("structure", file=structure_file)
    chains = [c for c in structure_biopy.get_chains()]
    if len(chains) != 2:
        print(f"Unexpected chains in {structure_file.name}")
        continue
    chainA = structure_biopy[0][chains[0].id]
    chainB = structure_biopy[0][chains[1].id]
    dataSolved.loc[len(dataSolved)] = {"set" : "DMI", "PDB_id": pdb_id, "chainA": chainA, "chainB": chainB}


# Now DDI
for structure_file in DDI_solved:
    ddi_pfam_id = "_".join(structure_file.name.split("_")[0:2])
    pdb_id = structure_file.name.split("_")[2]
    chainA_id = structure_file.name.split("_")[3][0]
    chainB_id = structure_file.name.split("_")[3][1]

    structure_biopy = parser.get_structure("structure", file=structure_file)
    chains = [c for c in structure_biopy.get_chains()]
    chain_ids = [c.id for c in structure_biopy.get_chains()]
    if len(chains) != 2 or chainA_id not in chain_ids or chainB_id not in chain_ids:
        print(f"Unexpected chains in {structure_file.name}: Expected {chainA_id} and {chainB_id}, got {chains}")
        continue
    chainA = structure_biopy[0][chainA_id]
    chainB = structure_biopy[0][chainB_id]

    dataSolved.loc[len(dataSolved)] = {"set" : "DDI", "PDB_id": pdb_id, "ddi_pfam_id": ddi_pfam_id, "chainA": chainA, "chainB": chainB}

dataSolved

Unnamed: 0,set,PDB_id,ddi_pfam_id,chainA,chainB
0,DMI,1ATP,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
1,DMI,1AXC,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
2,DMI,1B72,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
3,DMI,1B8Q,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
4,DMI,1BXX,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
...,...,...,...,...,...
181,DDI,3ZNI,PF14447_PF00179,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
182,DDI,3J7Y,PF14978_PF00327,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
183,DDI,6D6Q,PF15985_PF10175,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
184,DDI,3KZ1,PF17838_PF00071,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."


In [29]:
# Read parsed AF3 metric file
dataAF = pd.read_csv(Path("../ressources/AF3/AF3_metrics.tsv"), sep="\t")
dataAF

Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,chainA_length,chainB_length,fraction_disordered,has_clash,iptm,ptm,...,PDB_id,ELM_instance,ddi_pfam_id,PDB_id_random_paired,ELM_instance_random_paired,ddi_pfam_id_random_paired,sequence_initial,sequence_mutated,chainA_id,chainB_id
0,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_0,189,113,0.04,0.0,0.20,0.53,...,2D74,,PF00009_PF01873,,,,,,A,B
1,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_1,189,113,0.04,0.0,0.16,0.51,...,2D74,,PF00009_PF01873,,,,,,A,B
2,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_2,189,113,0.04,0.0,0.13,0.46,...,2D74,,PF00009_PF01873,,,,,,A,B
3,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_3,189,113,0.04,0.0,0.10,0.47,...,2D74,,PF00009_PF01873,,,,,,A,B
4,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_4,189,113,0.04,0.0,0.07,0.49,...,2D74,,PF00009_PF01873,,,,,,A,B
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_0,312,4,0.02,0.0,0.91,0.95,...,2C0L,TRG_PTS1,,4CY3,LIG_WD40_WDR5_WIN_2,,,,,
3176,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_1,312,4,0.01,0.0,0.90,0.95,...,2C0L,TRG_PTS1,,4CY3,LIG_WD40_WDR5_WIN_2,,,,,
3177,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_2,312,4,0.02,0.0,0.90,0.95,...,2C0L,TRG_PTS1,,4CY3,LIG_WD40_WDR5_WIN_2,,,,,
3178,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_3,312,4,0.01,0.0,0.90,0.95,...,2C0L,TRG_PTS1,,4CY3,LIG_WD40_WDR5_WIN_2,,,,,


In [39]:
# Read parsed AF3 metric file
dataAF = pd.read_csv(Path("../ressources/AF3/AF3_metrics.tsv"), sep="\t")

dataAF["chainA_template"], dataAF["chainB_template"] = None, None
for i, row in dataAF.iterrows():
    benchmark_set = row["benchmark_set"]
    prediction_name = row["prediction_name"]
    model_id = row["model_id"]
    pdb_id = row["PDB_id"]
    pdb_id_random_paired = str(row["PDB_id_random_paired"])
    set_ = "DDI" if "ddi" in benchmark_set else "DMI"
    

    if (_num_solved_files := len(_row_solvedA := dataSolved[np.logical_and(dataSolved["set"] == set_, dataSolved["PDB_id"] == pdb_id)])) != 1:
        if model_id == "ranked_0":
            print("\t", f"Chain A of {prediction_name} ({benchmark_set}) with PDB ID {pdb_id} was", ("not found in solved structures" if _num_solved_files == 0 else "found multiple times in solved structures"))
        continue
    chainA_template = _row_solvedA["chainA"].item()
    
    if pdb_id_random_paired == "nan":
        chainB_template = _row_solvedA["chainB"].item()
    else:
        if (_num_solved_files := len(_row_solvedB := dataSolved[np.logical_and(dataSolved["set"] == set_, dataSolved["PDB_id"] == pdb_id_random_paired)])) != 1:
            if model_id == "ranked_0":
                print("\t", f"Chain B of {prediction_name} ({benchmark_set}) with PDB ID {pdb_id_random_paired} was", ("not found in solved structures" if _num_solved_files == 0 else "found multiple times in solved structures"))
            continue
        chainB_template = _row_solvedB["chainB"].item()

    dataAF.at[i, "chainA_template"] = chainA_template
    dataAF.at[i, "chainB_template"] = chainB_template
display(dataAF)

	 Chain A of LIG_Actin_RPEL_3_2V51 (known_minimal) with PDB ID 2V51 was not found in solved structures
	 Chain A of LIG_ActinCP_CPI_1_3AA0 (known_minimal) with PDB ID 3AA0 was not found in solved structures
	 Chain B of D1PF07525_PF03931_3ZKJ.D2PF07724_PF00227_1G4B (random_ddi) with PDB ID 1G4B was not found in solved structures
	 Chain A of D1PF07724_PF00227_1G4B.D2PF08644_PF03531_4KHB (random_ddi) with PDB ID 1G4B was not found in solved structures
	 Chain A of MLIG_PAM2_1_1JGN.DMOD_CDK_SPxK_1_2CCI (random_minimal) with PDB ID 2CCI was not found in solved structures


Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,chainA_length,chainB_length,fraction_disordered,has_clash,iptm,ptm,...,ddi_pfam_id,PDB_id_random_paired,ELM_instance_random_paired,ddi_pfam_id_random_paired,sequence_initial,sequence_mutated,chainA_id,chainB_id,chainA_template,chainB_template
0,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_0,189,113,0.04,0.0,0.20,0.53,...,PF00009_PF01873,,,,,,A,B,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
1,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_1,189,113,0.04,0.0,0.16,0.51,...,PF00009_PF01873,,,,,,A,B,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
2,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_2,189,113,0.04,0.0,0.13,0.46,...,PF00009_PF01873,,,,,,A,B,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
3,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_3,189,113,0.04,0.0,0.10,0.47,...,PF00009_PF01873,,,,,,A,B,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
4,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_4,189,113,0.04,0.0,0.07,0.49,...,PF00009_PF01873,,,,,,A,B,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_0,312,4,0.02,0.0,0.91,0.95,...,,2C0L,TRG_PTS1,,,,A,B,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
3176,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_1,312,4,0.01,0.0,0.90,0.95,...,,2C0L,TRG_PTS1,,,,A,B,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
3177,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_2,312,4,0.02,0.0,0.90,0.95,...,,2C0L,TRG_PTS1,,,,A,B,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
3178,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_3,312,4,0.01,0.0,0.90,0.95,...,,2C0L,TRG_PTS1,,,,A,B,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."


### 2 Fixing switched chains
The input json for the AF3 runs was generated by sorting the chains based on their length

In [None]:
#dataAF_copy = dataAF.copy()

In [None]:
dataAF

In [14]:
dataAF[dataAF["PDB_id"] == "4Y66"]

Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,chainA_length,chainB_length,fraction_disordered,has_clash,iptm,ptm,...,ddi_pfam_id,PDB_id_random_paired,ELM_instance_random_paired,ddi_pfam_id_random_paired,sequence_initial,sequence_mutated,chainA_id,chainB_id,chainA_template,chainB_template
90,alphafold3,known_ddi,PF03962_PF07106_4Y66_E_resi10_resi83.F_resi7_r...,ranked_0,73,69,0.16,0.0,0.46,0.62,...,PF03962_PF07106,,,,,,E,F,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
91,alphafold3,known_ddi,PF03962_PF07106_4Y66_E_resi10_resi83.F_resi7_r...,ranked_1,73,69,0.08,0.0,0.44,0.63,...,PF03962_PF07106,,,,,,E,F,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
92,alphafold3,known_ddi,PF03962_PF07106_4Y66_E_resi10_resi83.F_resi7_r...,ranked_2,73,69,0.12,0.0,0.41,0.61,...,PF03962_PF07106,,,,,,E,F,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
93,alphafold3,known_ddi,PF03962_PF07106_4Y66_E_resi10_resi83.F_resi7_r...,ranked_3,73,69,0.13,0.0,0.38,0.58,...,PF03962_PF07106,,,,,,E,F,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
94,alphafold3,known_ddi,PF03962_PF07106_4Y66_E_resi10_resi83.F_resi7_r...,ranked_4,73,69,0.03,0.0,0.36,0.59,...,PF03962_PF07106,,,,,,E,F,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
2350,alphafold3,random_ddi,D1PF03962_PF07106_4Y66.D2PF04670_PF15454_6JWP,ranked_0,69,60,0.53,0.0,0.15,0.39,...,D1PF03962_PF07106,6JWP,,D2PF04670_PF15454,,,,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
2351,alphafold3,random_ddi,D1PF03962_PF07106_4Y66.D2PF04670_PF15454_6JWP,ranked_1,69,60,0.47,0.0,0.14,0.39,...,D1PF03962_PF07106,6JWP,,D2PF04670_PF15454,,,,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
2352,alphafold3,random_ddi,D1PF03962_PF07106_4Y66.D2PF04670_PF15454_6JWP,ranked_2,69,60,0.41,0.0,0.06,0.35,...,D1PF03962_PF07106,6JWP,,D2PF04670_PF15454,,,,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
2353,alphafold3,random_ddi,D1PF03962_PF07106_4Y66.D2PF04670_PF15454_6JWP,ranked_3,69,60,0.27,0.0,0.14,0.39,...,D1PF03962_PF07106,6JWP,,D2PF04670_PF15454,,,,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
2354,alphafold3,random_ddi,D1PF03962_PF07106_4Y66.D2PF04670_PF15454_6JWP,ranked_4,69,60,0.31,0.0,0.1,0.38,...,D1PF03962_PF07106,6JWP,,D2PF04670_PF15454,,,,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."


In [122]:
align_score_array = []
for i, row in dataAF.iterrows():
    benchmark_set = row["benchmark_set"]
    prediction_name = row["prediction_name"]
    model_id = row["model_id"]
    pdb_id = row["PDB_id"]
    pdb_id_random_paired = str(row["PDB_id_random_paired"])
    set_ = "DDI" if "ddi" in benchmark_set else "DMI"
    chainA_template = row["chainA_template"]
    chainB_template = row["chainB_template"]

    if chainA_template is None or chainB_template is None:
        print(f"{prediction_name} ({benchmark_set}) has no template")
        continue

    structure_path: Path = path_AF3_structures / set_ / benchmark_set / prediction_name / (model_id + ".pdb")

    if not structure_path.exists():
        print(f"{structure_path.relative_to(path_AF3_structures)} could not been found")
        continue
    structure_biopy = parser.get_structure("structure", file=structure_path)
    chains = [c for c in structure_biopy.get_chains()]
    if len(chains) != 2:
        print(f"Unexpected chains in {prediction_name}/{model_id}.pdb")
        continue

    chainA = structure_biopy[0][chains[0].id]
    chainB = structure_biopy[0][chains[1].id]

    sequenceA = '-'.join([r.get_resname() for r in chainA.get_residues()])
    sequenceB = '-'.join([r.get_resname() for r in chainB.get_residues()])
    sequenceA_template = '-'.join([r.get_resname() for r in chainA_template.get_residues()])

    align_score_expected = sum([m.size for m in difflib.SequenceMatcher(None, sequenceA_template, sequenceA, autojunk=False).get_matching_blocks()])
    align_score_switch = sum([m.size for m in difflib.SequenceMatcher(None, sequenceA_template, sequenceB, autojunk=False).get_matching_blocks()])
    align_score = align_score_expected / align_score_switch

    align_score_array.append(align_score)

    if align_score < 0.9 and model_id == "ranked_0":
        print(f"Most likely for {prediction_name} ({benchmark_set}) chain A and B are flipped with align score {round(align_score,3)}")
    elif align_score < 1 and model_id == "ranked_0":
        print(f"\tMismatch possible for {prediction_name} ({benchmark_set}) chain A and B are flipped with align score {round(align_score,3)}")
    elif align_score < 1.2 and model_id == "ranked_0":
        print(f"\tUnsure match for {prediction_name} ({benchmark_set}) chain A and B are flipped with align score {round(align_score,3)}")

    if prediction_name == "D1PF11931_PF04037_6G90.D2PF12026_PF00617_3T6G":
        break

Most likely for PF01298_PF00405_3VE1_A_resi174_resi345.B_resi342_resi664 (known_ddi) chain A and B are flipped with align score 0.79
Most likely for PF02351_PF17812_6Q2N_D_resi243_resi337.F_resi265_resi379 (known_ddi) chain A and B are flipped with align score 0.269
Most likely for PF02747_PF00752_1UL1_X_resi2_resi99.A_resi127_resi254 (known_ddi) chain A and B are flipped with align score 0.569
Most likely for PF03962_PF07106_4Y66_E_resi10_resi83.F_resi7_resi79 (known_ddi) chain A and B are flipped with align score 0.567
Most likely for PF04718_PF05405_6TT7_S_resi19_resi102.K_resi31_resi207 (known_ddi) chain A and B are flipped with align score 0.391
Most likely for PF04739_PF16579_2V92_B_resi191_resi271.A_resi395_resi545 (known_ddi) chain A and B are flipped with align score 0.332
Most likely for PF04824_PF02463_1W1W_F_resi487_resi559.B_resi3_resi1223 (known_ddi) chain A and B are flipped with align score 0.591
Most likely for PF05064_PF13874_5C3L_C_resi358_resi470.A_resi318_resi450 (

KeyboardInterrupt: 

In [None]:
prediction_name

'MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3'

In [None]:
print(align_score_expected, align_score_switch)

1211 15


In [112]:
print(sequenceA)
print(sequenceA_template)
print(sequenceB)

VAL-LYS-TRP-CYS-ALA-LEU-SER-HIS-HIS-GLU-ARG-LEU-LYS-CYS-ASP-GLU-TRP-SER-VAL-ASN-SER-VAL-GLY-LYS-ILE-GLU-CYS-VAL-SER-ALA-GLU-THR-THR-GLU-ASP-CYS-ILE-ALA-LYS-ILE-MET-ASN-GLY-GLU-ALA-ASP-ALA-MET-SER-LEU-ASP-GLY-GLY-PHE-VAL-TYR-ILE-ALA-GLY-LYS-CYS-GLY-LEU-VAL-PRO-VAL-LEU-ALA-GLU-ASN-TYR-ASP-LYS-SER-ASP-ASN-CYS-GLU-ASP-THR-PRO-GLU-ALA-GLY-TYR-PHE-ALA-VAL-ALA-VAL-VAL-LYS-LYS-SER-ALA-SER-ASP-LEU-THR-TRP-ASP-ASN-LEU-LYS-GLY-LYS-LYS-SER-CYS-HIS-THR-ALA-VAL-GLY-ARG-THR-ALA-GLY-TRP-ASN-ILE-PRO-MET-GLY-LEU-LEU-TYR-ASN-LYS-ILE-ASN-HIS-CYS-ARG-PHE-ASP-GLU-PHE-PHE-SER-GLU-GLY-CYS-ALA-PRO-GLY-SER-LYS-LYS-ASP-SER-SER-LEU-CYS-LYS-LEU-CYS-MET-GLY-SER-GLY-LEU-ASN-LEU-CYS-GLU-PRO-ASN-ASN-LYS-GLU-GLY-TYR-TYR-GLY-TYR-THR-GLY-ALA-PHE-ARG-CYS-LEU-VAL-GLU-LYS-GLY-ASP-VAL-ALA-PHE-VAL-LYS-HIS-GLN-THR-VAL-PRO-GLN-ASN-THR-GLY-GLY-LYS-ASN-PRO-ASP-PRO-TRP-ALA-LYS-ASN-LEU-ASN-GLU-LYS-ASP-TYR-GLU-LEU-LEU-CYS-LEU-ASP-GLY-THR-ARG-LYS-PRO-VAL-GLU-GLU-TYR-ALA-ASN-CYS-HIS-LEU-ALA-ARG-ALA-PRO-ASN-HIS-ALA-VAL-VAL-THR-ARG-LYS-

In [8]:
dataAF["chainA_template"], dataAF["chainB_template"] = None, None
_tmp_benchmark_set = None
for i, row in dataAF.iterrows():
    benchmark_set = str(row["benchmark_set"])
    set_ = "DDI" if "ddi" in benchmark_set else "DMI"
    if not _tmp_benchmark_set == benchmark_set:
        _tmp_benchmark_set = benchmark_set
        print(benchmark_set)
    prediction_name = row["prediction_name"]
    model_id = row["model_id"]
    if not model_id == "ranked_0":
        continue
    pdb_id = row["PDB_id"]
    pdb_id_random_paired = str(row["PDB_id_random_paired"])

    if (_num_solved_files := len(dataSolved[np.logical_and(dataSolved["set"] == set_, dataSolved["PDB_id"] == pdb_id)])) != 1:
        print("\t", f"{pdb_id} for set {benchmark_set} was", ("not found in solved structures" if _num_solved_files == 0 else "found multiple times in solved structures"))
        continue
        
    chainA_template = dataSolved[np.logical_and(dataSolved["set"] == set_, dataSolved["PDB_id"] == pdb_id)]["chainA"].item()
    
    if pdb_id_random_paired == "nan":
        chainB_template = dataSolved[np.logical_and(dataSolved["set"] == set_, dataSolved["PDB_id"] == pdb_id)]["chainB"].item()
    else:
        if (_num_solved_files := len(dataSolved[np.logical_and(dataSolved["set"] == set_, dataSolved["PDB_id"] == pdb_id_random_paired)])) != 1:
            print("\t", f"{pdb_id_random_paired} (random pair) for set {benchmark_set} was", ("not found in solved structures" if _num_solved_files == 0 else "found multiple times in solved structures"))
            continue
        chainB_template = dataSolved[np.logical_and(dataSolved["set"] == set_, dataSolved["PDB_id"] == pdb_id_random_paired)]["chainB"].item()

    dataAF.at[i, "chainA_template"] = chainA_template
    dataAF.at[i, "chainB_template"] = chainB_template
dataAF

known_ddi
known_minimal
	 2V51 for set known_minimal was not found in solved structures
	 3AA0 for set known_minimal was not found in solved structures
mutations
random_ddi
	 1G4B (random pair) for set random_ddi was not found in solved structures
	 1G4B for set random_ddi was not found in solved structures
random_minimal
	 2CCI (random pair) for set random_minimal was not found in solved structures


Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,chainA_length,chainB_length,fraction_disordered,has_clash,iptm,ptm,...,ELM_instance_random_paired,ddi_pfam_id_random_paired,sequence_initial,sequence_mutated,chainA_id,chainB_id,chainA,chainB,chainA_template,chainB_template
0,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_0,189,113,0.04,0.0,0.20,0.53,...,,,,,A,B,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>), (<...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
1,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_1,189,113,0.04,0.0,0.16,0.51,...,,,,,A,B,,,,
2,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_2,189,113,0.04,0.0,0.13,0.46,...,,,,,A,B,,,,
3,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_3,189,113,0.04,0.0,0.10,0.47,...,,,,,A,B,,,,
4,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_4,189,113,0.04,0.0,0.07,0.49,...,,,,,A,B,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3175,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_0,312,4,0.02,0.0,0.91,0.95,...,LIG_WD40_WDR5_WIN_2,,,,,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>), (<...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
3176,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_1,312,4,0.01,0.0,0.90,0.95,...,LIG_WD40_WDR5_WIN_2,,,,,,,,,
3177,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_2,312,4,0.02,0.0,0.90,0.95,...,LIG_WD40_WDR5_WIN_2,,,,,,,,,
3178,alphafold3,random_minimal,MTRG_PTS1_2C0L.DLIG_WD40_WDR5_WIN_2_4CY3,ranked_3,312,4,0.01,0.0,0.90,0.95,...,LIG_WD40_WDR5_WIN_2,,,,,,,,,


### 3

In [19]:
dataAF[dataAF["model_id"] == "ranked_0"]

Unnamed: 0,model_preset,benchmark_set,prediction_name,model_id,chainA_length,chainB_length,fraction_disordered,has_clash,iptm,ptm,...,ELM_instance_random_paired,ddi_pfam_id_random_paired,sequence_initial,sequence_mutated,chainA_id,chainB_id,chainA,chainB,chainA_template,chainB_template
0,alphafold3,known_ddi,PF00009_PF01873_2D74_A_resi12_resi200.B_resi21...,ranked_0,189,113,0.04,0.0,0.20,0.53,...,,,,,A,B,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>), (<...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
5,alphafold3,known_ddi,PF00026_PF06394_1F34_A_resi13_resi326.B_resi62...,ranked_0,314,59,0.02,0.0,0.36,0.82,...,,,,,A,B,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>), (<...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
10,alphafold3,known_ddi,PF00059_PF00041_1TDQ_B_resi10_resi125.A_resi85...,ranked_0,116,102,0.03,0.0,0.79,0.82,...,,,,,B,A,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>), (<...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
15,alphafold3,known_ddi,PF00089_PF00095_1FLE_E_resi16_resi243.I_resi12...,ranked_0,238,45,0.06,0.0,0.90,0.94,...,,,,,E,I,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>), (<...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
20,alphafold3,known_ddi,PF00137_PF07850_6VQG_i_resi7_resi86.p_resi292_...,ranked_0,80,52,0.48,0.0,0.55,0.64,...,,,,,i,p,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>), (<...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3155,alphafold3,random_minimal,MTRG_ER_FFAT_1_2RR3.DDOC_MAPK_HePTP_8_2GPH,ranked_0,334,9,0.03,0.0,0.36,0.89,...,DOC_MAPK_HePTP_8,,,,,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>), (<...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
3160,alphafold3,random_minimal,MTRG_LysEnd_GGAAcLL_1_1JWG.DLIG_SUMO_SIM_anti_...,ranked_0,78,7,0.08,0.0,0.76,0.85,...,LIG_SUMO_SIM_anti_2,,,,,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>), (<...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
3165,alphafold3,random_minimal,MTRG_NES_CRM1_1_3GB8.DDOC_PP1_MyPhoNE_1_1S70,ranked_0,299,16,0.05,0.0,0.64,0.94,...,DOC_PP1_MyPhoNE_1,,,,,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>), (<...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."
3170,alphafold3,random_minimal,MTRG_NLS_Bipartite_1_1PJM.DLIG_PDZ_Class_1_1D5G,ranked_0,96,17,0.15,0.0,0.42,0.71,...,LIG_PDZ_Class_1,,,,,,"((<Atom N>, <Atom CA>, <Atom C>, <Atom O>), (<...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At...","((<Atom N>, <Atom CA>, <Atom C>, <Atom O>, <At..."


In [13]:
_tmp_benchmark_set = None
for i, row in dataAF.iterrows():
    benchmark_set = str(row["benchmark_set"])
    set_ = "DDI" if "ddi" in benchmark_set else "DMI"
    if not _tmp_benchmark_set == benchmark_set:
        _tmp_benchmark_set = benchmark_set
        print(benchmark_set)
    prediction_name = row["prediction_name"]
    model_id = row["model_id"]
    if not model_id == "ranked_0":
        continue
    pdb_id = row["PDB_id"]
    pdb_id_random_paired = str(row["PDB_id_random_paired"])
    chainA = row["chainA"]
    chainA_template = row["chainA_template"]
    chainB = row["chainB"]
    chainB_template = row["chainB_template"]
    if chainA_template is None or chainB_template is None:
        print("\t", f"Skipping {prediction_name} as there is noch template")
        continue

    break

known_ddi


In [18]:
prediction_name

'PF00009_PF01873_2D74_A_resi12_resi200.B_resi21_resi133'

In [17]:
sequenceA = '-'.join([r.get_resname() for r in chainA.get_residues()])
sequenceA_template = '-'.join([r.get_resname() for r in chainA_template.get_residues()])
print(sequenceA)
print(sequenceA_template)

GLY-GLU-VAL-PRO-THR-PHE-LYS-LEU-VAL-LEU-VAL-GLY-ASP-GLY-GLY-THR-GLY-LYS-THR-THR-PHE-VAL-LYS-ARG-HIS-LEU-THR-GLY-GLU-PHE-GLU-LYS-LYS-TYR-ILE-ALA-THR-ILE-GLY-VAL-GLU-VAL-HIS-PRO-LEU-SER-PHE-TYR-THR-ASN-PHE-GLY-GLU-ILE-LYS-PHE-ASP-VAL-TRP-ASP-THR-ALA-GLY-LEU-GLU-LYS-PHE-GLY-GLY-LEU-ARG-ASP-GLY-TYR-TYR-ILE-ASN-ALA-GLN-CYS-ALA-ILE-ILE-MET-PHE-ASP-VAL-THR-SER-ARG-ILE-THR-TYR-LYS-ASN-VAL-PRO-ASN-TRP-HIS-ARG-ASP-LEU-VAL-ARG-VAL-CYS-GLU-ASN-ILE-PRO-ILE-VAL-LEU-CYS-GLY-ASN-LYS-VAL-ASP-VAL-LYS-GLU-ARG-LYS-VAL-LYS-ALA-LYS-THR-ILE-THR-PHE-HIS-ARG-LYS-LYS-ASN-LEU-GLN-TYR-TYR-ASP-ILE-SER-ALA-LYS-SER-ASN-TYR-ASN-PHE-GLU-LYS-PRO-PHE-LEU-TRP-LEU-ALA-ARG-LYS-LEU-ALA-GLY-ASN-PRO-GLN-LEU-GLU-PHE-VAL-GTP-MG-HOH-HOH-HOH-HOH-HOH-HOH-HOH-HOH-HOH-HOH-HOH-HOH-HOH-HOH-HOH-HOH
LYS-THR-ARG-GLN-ALA-GLU-VAL-ASN-ILE-GLY-MET-VAL-GLY-HIS-VAL-ASP-HIS-GLY-LYS-THR-THR-LEU-THR-LYS-ALA-LEU-THR-GLY-VAL-TRP-THR-ASP-THR-LEU-ARG-ARG-GLY-ILE-THR-ILE-LYS-ILE-GLY-PHE-ALA-ASP-ALA-GLU-ILE-ARG-ARG-CYS-SER-ASN-CYS-GLY-ARG-TYR-SER-THR-S

In [14]:
from Bio.PDB.StructureAlignment import StructureAlignment

In [16]:
chainA.get_atoms()

<generator object Chain.get_atoms at 0x00000239213435E0>

In [None]:
from Bio.PDB import Superimposer
super_imposer = Superimposer()
super_imposer.set_atoms(chainA.get_atoms(), chainA_template.get_atoms())
super_imposer.apply(sample_model.get_atoms())

In [None]:
dataAF["RMSD_domain"] = None
dataAF["align_score_domain"] = None
dataAF["num_align_atoms_domain"] = None
dataAF["num_align_resi_domain"] = None
dataAF["RMSD_backbone_peptide"] = None
dataAF["RMSD_all_atom_peptide"] = None

for i,row in dataAF.iterrows():
    # RMSD can only be calculated for DMI. Mutations also allows limited RMSD calculations, as only chain B is mutated
    if (benchmark_set := row["benchmark_set"] == "known_minimal") or benchmark_set == "mutations":
        pdb_id = str(row["PDB_id"])
        prediction_name = str(row["prediction_name"])
        model_id = str(row["model_id"])

        structure_path = ressources_path / "AF3" / "DMI" / "known_minimal" / prediction_name / str(model_id + ".pdb")
        if not structure_path.exists():
            print(f"For structure {prediction_name}_{model_id} the pdb file does not exist. Skip")
            continue
        structure_template_path = ressources_path / "solved" / "DMI_solved_structures" / str(pdb_id + "_min_DMI.pdb")
        if not structure_template_path.exists():
            print(f"For structure {prediction_name}_{model_id} the template pdb file does not exist. Skip")
            continue
        
        structure_biopy = parser.get_structure("structure", file=structure_path)
        chains = [c for c in structure_biopy.get_chains()]
        if len(chains) != 2:
            print(f"Unexpected chains in {structure_file.name}")
            continue
        chainA = structure_biopy[0][chains[0].id]
        chainB = structure_biopy[0][chains[1].id]

        sequenceA = '-'.join([r.get_resname() for r in chainA.get_residues()])
        sequenceB = '-'.join([r.get_resname() for r in chainB.get_residues()])

        structure_biopy_template = parser.get_structure("structure", file=structure_template_path)
        chains_template = [c for c in structure_biopy.get_chains()]
        if len(chains_template) != 2:
            print(f"Unexpected chains in {structure_template_path.name}")
            continue
        chainA_template = structure_biopy[0][chains[0].id]
        chainB_template = structure_biopy[0][chains[1].id]

        sequenceA_template = '-'.join([r.get_resname() for r in chainA_template.get_residues()])
        sequenceB_template = '-'.join([r.get_resname() for r in chainB_template.get_residues()])

    
        chainA_ratio = difflib.SequenceMatcher(None, sequenceA, sequenceA_template).ratio()

        if chainA_ratio < 0.9:
            print(f"For structure {prediction_name}_{model_id} the sequences for chain A differ more than expected")
            print("\t", sequenceA, "\n\t", sequenceA_template)
            continue
        if sequenceB != sequenceB_template:
            print(f"For structure {prediction_name}_{model_id} the sequences for chain B differ")
            print("\t", sequenceB, "\n\t", sequenceB_template)
            continue
        for o in pymol.cmd.get_object_list():
            pymol.cmd.delete(o)

        pymol.cmd.load(structure_path, "AF")
        pymol.cmd.load(structure_template_path, "solved")
        pymol.cmd.remove(selection="elem 'H'")

        space = {'solved_resi': [], "af_resi": []}
        pymol.cmd.iterate("solved and chain B", "solved_resi.append(int(resi))", space=space)
        pymol.cmd.iterate("af and chain B", "af_resi.append(int(resi))", space=space)
        # Calcuate two offsets from beginning and end of chain B to check for potential missing IDs
        offset_low = np.min(space["solved_resi"]) - np.min(space["af_resi"])
        offset_high = np.max(space["solved_resi"]) - np.max(space["af_resi"])
        if not offset_low == offset_high:
            print(f"Offset error for {prediction_name} (model {model_id})")
            continue

        pymol.cmd.alter("af and chain B", f"resi = (int(resi) + {offset_low})")
        pymol.cmd.sort()
        pymol.cmd.alter("chain B", "segi = chain")
        pymol.cmd.sort()

        #    0: RMSD after refinement
        #    1: Number of aligned atoms after refinement
        #    2: Number of refinement cycles
        #    3: RMSD before refinement
        #    4: Number of aligned atoms before refinement
        #    5: Raw alignment score
        #    6: Number of residues aligned

        # Cycles = 0 to prevent refinement of the structure (we don't want any modifications to the structure)
        align_output = pymol.cmd.align(mobile="AF and chain A", target="solved and chain A", object="algn_domain", cycles=0, )
        RMSD_domain = align_output[0]
        num_align_atoms_domain = align_output[1]
        align_score_domain = align_output[5]
        num_align_resi_domain = align_output[6]

        RMSD_backbone_peptide = pymol.cmd.rms_cur(mobile="AF and chain B and bb.", target="solved and chain B and bb.", object="peptide_super_bb")
        RMSD_all_atom_peptide = pymol.cmd.rms_cur(mobile="AF and chain B", target="solved and chain B", object="peptide_super_all_atoms")
        dataAF.at[i, "RMSD_domain"] =  RMSD_domain
        dataAF.at[i, "align_score_domain"] =  align_score_domain
        dataAF.at[i, "num_align_atoms_domain"] =  num_align_atoms_domain
        dataAF.at[i, "num_align_resi_domain"] =  num_align_resi_domain
        dataAF.at[i, "RMSD_backbone_peptide"] =  RMSD_backbone_peptide
        dataAF.at[i, "RMSD_all_atom_peptide"] =  RMSD_all_atom_peptide

In [None]:
prediction_name = "DEG_SCF_COI1_1_3OGL"
pdb_id = prediction_name.split("_")
print(pdb_id)

In [None]:
prediction_name = "DEG_SCF_COI1_1_3OGL"
pdb_id = prediction_name.split("_")[-1]
model_id = "ranked_0"
structure_path = ressources_path / "AF3" / "DMI" / "known_minimal" / prediction_name / str(model_id + ".pdb")
structure_template_path = ressources_path / "solved" / "DMI_solved_structures" / str(pdb_id + "_min_DMI.pdb")
for o in pymol.cmd.get_object_list():
    pymol.cmd.delete(o)

pymol.cmd.load(structure_path, "AF")
pymol.cmd.load(structure_template_path, "solved")
pymol.cmd.remove(selection="elem 'H'")

space = {'solved_resi': [], "af_resi": []}
pymol.cmd.iterate("solved and chain B", "solved_resi.append(int(resi))", space=space)
pymol.cmd.iterate("af and chain B", "af_resi.append(int(resi))", space=space)
# Calcuate two offsets from beginning and end of chain B to check for potential missing IDs
offset_low = np.min(space["solved_resi"]) - np.min(space["af_resi"])
offset_high = np.max(space["solved_resi"]) - np.max(space["af_resi"])
assert offset_low == offset_high

pymol.cmd.alter("af and chain B", f"resi = (int(resi) + {offset_low})")
pymol.cmd.sort()
pymol.cmd.alter("chain B", "segi = chain")
pymol.cmd.sort()

#    0: RMSD after refinement
#    1: Number of aligned atoms after refinement
#    2: Number of refinement cycles
#    3: RMSD before refinement
#    4: Number of aligned atoms before refinement
#    5: Raw alignment score
#    6: Number of residues aligned

# Cycles = 0 to prevent refinement of the structure (we don't want any modifications to the structure)
align_output = pymol.cmd.align(mobile="AF and chain A", target="solved and chain A", object="algn_domain", cycles=0, )
RMSD_domain = align_output[0]
num_align_atoms_domain = align_output[1]
align_score_domain = align_output[5]
num_align_resi_domain = align_output[6]

RMSD_backbone_peptide = pymol.cmd.rms_cur(mobile="AF and chain B and bb.", target="solved and chain B and bb.", object="peptide_super_bb")
RMSD_all_atom_peptide = pymol.cmd.rms_cur(mobile="AF and chain B", target="solved and chain B", object="peptide_super_all_atoms")
dataAF.at[i, "RMSD_domain"] =  RMSD_domain
dataAF.at[i, "align_score_domain"] =  align_score_domain
dataAF.at[i, "num_align_atoms_domain"] =  num_align_atoms_domain
dataAF.at[i, "num_align_resi_domain"] =  num_align_resi_domain
dataAF.at[i, "RMSD_backbone_peptide"] =  RMSD_backbone_peptide
dataAF.at[i, "RMSD_all_atom_peptide"] =  RMSD_all_atom_peptide

In [None]:
pymol.cmd.iterate("chain B and solved", "print(resi)")