# Requisites

- Environment with dependencies in `conda_env.yaml`
- External dependencies to be downloaded and place in `training_data/utils/external`:
    - predict_ddG.py script from PyRosetta (https://github.com/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/additional_scripts/predict_ddG.py)
    - DSPP software executable (https://github.com/PDB-REDO/dssp/releases/download/v4.4.0/mkdssp-4.4.0-linux-x64)

In [1]:
from predict import \
get_cif, view_pdb, get_clean_pdb, Site, get_pockets, \
get_colabfold_msa, get_features, get_pockets_features, \
prepare_data, model, view_pockets, get_pathways, view_pockets_pathways

from predict import path as predict_path



  import pkg_resources


┌──────────────────────────────────────────────────────────────────────────────┐
│                                 PyRosetta-4                                  │
│              Created in JHU by Sergey Lyskov and PyRosetta Team              │
│              (C) Copyright Rosetta Commons Member Institutions               │
│                                                                              │
│ NOTE: USE OF PyRosetta FOR COMMERCIAL PURPOSES REQUIRE PURCHASE OF A LICENSE │
│         See LICENSE.PyRosetta.md or email license@uw.edu for details         │
└──────────────────────────────────────────────────────────────────────────────┘
PyRosetta-4 2025 [Rosetta PyRosetta4.conda.ubuntu.cxx11thread.serialization.Ubuntu.python311.Release 2025.24+release.8e1e5e54f047b0833dcf760a5cd5d3ce94d63938 2025-06-06T09:20:57] retrieved from: http://www.pyrosetta.org
core.init: Checking for fconfig files in pwd and ./rosetta/flags
core.init: Rosetta version: PyRosetta4.conda.ubuntu.cxx11thread.ser

<br>

**Files are written by default to the `predict` folder, customize it with using a different `path=` argument in the functions below.**

<br>

# Predict

## Get protein structure

In [2]:
pdb = get_cif(
    pdb_id="6t4k"
)

view_pdb(pdb)

PDBeMolstar(bg_color='#F7F7F7', custom_data={'data': "data_6T4K\n#\n_entry.id 6T4K\n#\n_citation.abstract ?\n_…

In [3]:
pdb.atoms

Unnamed: 0,group_PDB,id,type_symbol,label_atom_id,label_alt_id,label_comp_id,label_asym_id,label_entity_id,label_seq_id,pdbx_PDB_ins_code,...,auth_seq_id,auth_comp_id,auth_asym_id,auth_atom_id,pdbx_PDB_model_num,pdbx_label_index,pdbx_sifts_xref_db_name,pdbx_sifts_xref_db_acc,pdbx_sifts_xref_db_num,pdbx_sifts_xref_db_res
0,ATOM,1,N,N,.,LEU,A,1,23,?,...,267,LEU,A,N,1,23,UNP,P51449,267,L
1,ATOM,2,C,CA,.,LEU,A,1,23,?,...,267,LEU,A,CA,1,23,UNP,P51449,267,L
2,ATOM,3,C,C,.,LEU,A,1,23,?,...,267,LEU,A,C,1,23,UNP,P51449,267,L
3,ATOM,4,O,O,.,LEU,A,1,23,?,...,267,LEU,A,O,1,23,UNP,P51449,267,L
4,ATOM,5,C,CB,.,LEU,A,1,23,?,...,267,LEU,A,CB,1,23,UNP,P51449,267,L
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2170,HETATM,2171,O,O,.,HOH,F,5,.,?,...,767,HOH,A,O,1,767,?,?,?,?
2171,HETATM,2172,O,O,.,HOH,F,5,.,?,...,768,HOH,A,O,1,768,?,?,?,?
2172,HETATM,2173,O,O,.,HOH,F,5,.,?,...,769,HOH,A,O,1,769,?,?,?,?
2173,HETATM,2174,O,O,.,HOH,F,5,.,?,...,770,HOH,A,O,1,770,?,?,?,?


In [4]:
pdb.residues

Unnamed: 0,label_comp_id,label_asym_id,label_entity_id,label_seq_id,pdbx_PDB_ins_code,auth_seq_id,auth_comp_id,auth_asym_id,pdbx_PDB_model_num,pdbx_label_index,pdbx_sifts_xref_db_name,pdbx_sifts_xref_db_acc,pdbx_sifts_xref_db_num,pdbx_sifts_xref_db_res
0,LEU,A,1,23,?,267,LEU,A,1,23,UNP,P51449,267,L
8,THR,A,1,24,?,268,THR,A,1,24,UNP,P51449,268,T
15,GLU,A,1,25,?,269,GLU,A,1,25,UNP,P51449,269,E
24,ILE,A,1,26,?,270,ILE,A,1,26,UNP,P51449,270,I
32,GLU,A,1,27,?,271,GLU,A,1,27,UNP,P51449,271,E
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2170,HOH,F,5,.,?,767,HOH,A,1,767,?,?,?,?
2171,HOH,F,5,.,?,768,HOH,A,1,768,?,?,?,?
2172,HOH,F,5,.,?,769,HOH,A,1,769,?,?,?,?
2173,HOH,F,5,.,?,770,HOH,A,1,770,?,?,?,?


## Optional: set a target site

### With a modulator molecule

In [5]:
# Desired modulator is label_asym_id 'C'
pdb.residues.query("label_asym_id == 'C'")

Unnamed: 0,label_comp_id,label_asym_id,label_entity_id,label_seq_id,pdbx_PDB_ins_code,auth_seq_id,auth_comp_id,auth_asym_id,pdbx_PDB_model_num,pdbx_label_index,pdbx_sifts_xref_db_name,pdbx_sifts_xref_db_acc,pdbx_sifts_xref_db_num,pdbx_sifts_xref_db_res
2061,4F1,C,3,.,?,602,4F1,A,1,602,?,?,?,?


In [6]:
site = Site(
    pdb, 
    modulator_residues=pdb.residues.query("label_asym_id == 'C'"), 
    only_protein=True
)
site

<predict.Site at 0x7325d2c1a310>

In [7]:
site.modulator_residues

Unnamed: 0,label_comp_id,label_asym_id,label_entity_id,label_seq_id,pdbx_PDB_ins_code,auth_seq_id,auth_comp_id,auth_asym_id,pdbx_PDB_model_num,pdbx_label_index,pdbx_sifts_xref_db_name,pdbx_sifts_xref_db_acc,pdbx_sifts_xref_db_num,pdbx_sifts_xref_db_res
2061,4F1,C,3,.,?,602,4F1,A,1,602,?,?,?,?


In [8]:
site.residues

Unnamed: 0,label_comp_id,label_asym_id,label_entity_id,label_seq_id,pdbx_PDB_ins_code,auth_seq_id,auth_comp_id,auth_asym_id,pdbx_PDB_model_num,pdbx_label_index,pdbx_sifts_xref_db_name,pdbx_sifts_xref_db_acc,pdbx_sifts_xref_db_num,pdbx_sifts_xref_db_res
0,TRP,A,1,73,?,317,TRP,A,1,73,UNP,P51449,317,W
1,ALA,A,1,77,?,321,ALA,A,1,77,UNP,P51449,321,A
2,HIS,A,1,78,?,322,HIS,A,1,78,UNP,P51449,322,H
3,LEU,A,1,80,?,324,LEU,A,1,80,UNP,P51449,324,L
4,THR,A,1,81,?,325,THR,A,1,81,UNP,P51449,325,T
5,ILE,A,1,84,?,328,ILE,A,1,84,UNP,P51449,328,I
6,GLN,A,1,85,?,329,GLN,A,1,85,UNP,P51449,329,Q
7,VAL,A,1,88,?,332,VAL,A,1,88,UNP,P51449,332,V
8,LEU,A,1,109,?,353,LEU,A,1,109,UNP,P51449,353,L
9,LYS,A,1,110,?,354,LYS,A,1,110,UNP,P51449,354,K


In [9]:
# List of residue numbers of site
resnums = site.residues.label_seq_id.to_list()
resnums

['73',
 '77',
 '78',
 '80',
 '81',
 '84',
 '85',
 '88',
 '109',
 '110',
 '111',
 '112',
 '113',
 '114',
 '232',
 '235',
 '236',
 '238',
 '239',
 '240',
 '241',
 '243',
 '248',
 '250',
 '251',
 '252',
 '253',
 '254',
 '255',
 '257',
 '258',
 '261',
 '262']

### With a list of residues

In [10]:
# Site can be defined with a list of residues instead of a modulator
res_site = Site(
    pdb=pdb,
    residues=[{"label_asym_id": "A", "label_seq_id": seqnum} for seqnum in resnums],
    only_protein=True
)
res_site.residues

Unnamed: 0,label_comp_id,label_asym_id,label_entity_id,label_seq_id,pdbx_PDB_ins_code,auth_seq_id,auth_comp_id,auth_asym_id,pdbx_PDB_model_num,pdbx_label_index,pdbx_sifts_xref_db_name,pdbx_sifts_xref_db_acc,pdbx_sifts_xref_db_num,pdbx_sifts_xref_db_res
0,TRP,A,1,73,?,317,TRP,A,1,73,UNP,P51449,317,W
1,ALA,A,1,77,?,321,ALA,A,1,77,UNP,P51449,321,A
2,HIS,A,1,78,?,322,HIS,A,1,78,UNP,P51449,322,H
3,LEU,A,1,80,?,324,LEU,A,1,80,UNP,P51449,324,L
4,THR,A,1,81,?,325,THR,A,1,81,UNP,P51449,325,T
5,ILE,A,1,84,?,328,ILE,A,1,84,UNP,P51449,328,I
6,GLN,A,1,85,?,329,GLN,A,1,85,UNP,P51449,329,Q
7,VAL,A,1,88,?,332,VAL,A,1,88,UNP,P51449,332,V
8,LEU,A,1,109,?,353,LEU,A,1,109,UNP,P51449,353,L
9,LYS,A,1,110,?,354,LYS,A,1,110,UNP,P51449,354,K


## Process protein structure

In [11]:
clean_pdb = get_clean_pdb(
    pdb,
    protein_chains=["A"]
)
clean_pdb.residues

Unnamed: 0,label_comp_id,label_asym_id,label_entity_id,label_seq_id,pdbx_PDB_ins_code,auth_seq_id,auth_comp_id,auth_asym_id,pdbx_PDB_model_num,pdbx_label_index,pdbx_sifts_xref_db_name,pdbx_sifts_xref_db_acc,pdbx_sifts_xref_db_num,pdbx_sifts_xref_db_res
0,LEU,A,1,23,?,267,LEU,A,1,23,UNP,P51449,267,L
8,THR,A,1,24,?,268,THR,A,1,24,UNP,P51449,268,T
15,GLU,A,1,25,?,269,GLU,A,1,25,UNP,P51449,269,E
24,ILE,A,1,26,?,270,ILE,A,1,26,UNP,P51449,270,I
32,GLU,A,1,27,?,271,GLU,A,1,27,UNP,P51449,271,E
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1930,LYS,A,1,259,?,503,LYS,A,1,259,UNP,P51449,503,K
1939,GLU,A,1,260,?,504,GLU,A,1,260,UNP,P51449,504,E
1948,LEU,A,1,261,?,505,LEU,A,1,261,UNP,P51449,505,L
1956,PHE,A,1,262,?,506,PHE,A,1,262,UNP,P51449,506,F


## Pockets

In [12]:
pockets = get_pockets(clean_pdb)

pockets

Unnamed: 0,pocket
0,pocket7
1,pocket1
2,pocket2
3,pocket4
4,pocket9
5,pocket10
6,pocket5
7,pocket8
8,pocket3
9,pocket6


## Extra: pocket information

In [13]:
from utils.pocket_utils import Pocket, get_pockets_info
import pandas as pd

In [14]:
path = predict_path
path

'predict'

In [15]:
# Number of residues forming the pocket, and pocket-site overlap percentages
## site_in_pocket: % of residues of the site that are part of the pocket (% with the site of the PDB that gives the maximum %, comparing all sites of the PDB with the pocket)
## pocket_in_site: % of residues of the pocket that are part of the allosetric site (% with the site of the PDB that gives the maximum %)

pockets = pd.DataFrame((
    pocket
    for pocket in get_pockets_info(
        clean_pdb, 
        sites = ({
            # "mod": site.modulator_residues,
            "site": site.residues
        },),
        pockets_path = path
    )
))

pockets.loc[
    pockets[['site_in_pocket', 'pocket_in_site']].max(axis=1).sort_values(ascending=False).index
]

Unnamed: 0,pdb,pocket,nres,site_in_pocket,pocket_in_site
2,6t4k,pocket2,24,0.727273,1.0
4,6t4k,pocket9,12,0.121212,0.333333
1,6t4k,pocket1,32,0.212121,0.21875
0,6t4k,pocket7,18,0.0,0.0
3,6t4k,pocket4,11,0.0,0.0
5,6t4k,pocket10,8,0.0,0.0
6,6t4k,pocket5,18,0.0,0.0
7,6t4k,pocket8,9,0.0,0.0
8,6t4k,pocket3,12,0.0,0.0
9,6t4k,pocket6,12,0.0,0.0


In [16]:
pocket_id = "pocket1"

pocket = Pocket(f"{path}/{clean_pdb.entry_id}/{clean_pdb.entry_id}_out/pockets/{pocket_id}_atm.cif")
pocket

<utils.pocket_utils.Pocket at 0x732370a63f10>

In [17]:
pocket.residues.sort_values("label_seq_id")

Unnamed: 0,label_comp_id,label_asym_id,label_seq_id,pdbx_PDB_ins_code,auth_seq_id,auth_asym_id
54,MET,A,114,?,358,A
1,VAL,A,117,?,361,A
6,LEU,A,118,?,362,A
3,ARG,A,120,?,364,A
20,MET,A,121,?,365,A
30,ARG,A,123,?,367,A
31,ALA,A,124,?,368,A
68,VAL,A,132,?,376,A
82,PHE,A,133,?,377,A
15,PHE,A,134,?,378,A


In [18]:
site.residues

Unnamed: 0,label_comp_id,label_asym_id,label_entity_id,label_seq_id,pdbx_PDB_ins_code,auth_seq_id,auth_comp_id,auth_asym_id,pdbx_PDB_model_num,pdbx_label_index,pdbx_sifts_xref_db_name,pdbx_sifts_xref_db_acc,pdbx_sifts_xref_db_num,pdbx_sifts_xref_db_res
0,TRP,A,1,73,?,317,TRP,A,1,73,UNP,P51449,317,W
1,ALA,A,1,77,?,321,ALA,A,1,77,UNP,P51449,321,A
2,HIS,A,1,78,?,322,HIS,A,1,78,UNP,P51449,322,H
3,LEU,A,1,80,?,324,LEU,A,1,80,UNP,P51449,324,L
4,THR,A,1,81,?,325,THR,A,1,81,UNP,P51449,325,T
5,ILE,A,1,84,?,328,ILE,A,1,84,UNP,P51449,328,I
6,GLN,A,1,85,?,329,GLN,A,1,85,UNP,P51449,329,Q
7,VAL,A,1,88,?,332,VAL,A,1,88,UNP,P51449,332,V
8,LEU,A,1,109,?,353,LEU,A,1,109,UNP,P51449,353,L
9,LYS,A,1,110,?,354,LYS,A,1,110,UNP,P51449,354,K


In [19]:
pocket.feats

{'Pocket Score': 0.6382,
 'Drug Score': 0.9502,
 'Number of alpha spheres': 149.0,
 'Mean alpha-sphere radius': 3.6986,
 'Mean alpha-sphere Solvent Acc.': 0.521,
 'Mean B-factor of pocket residues': 0.0775,
 'Hydrophobicity Score': 64.7188,
 'Polarity Score': 8.0,
 'Amino Acid based volume Score': 4.875,
 'Pocket volume (Monte Carlo)': 955.4678,
 'Pocket volume (convex hull)': 617.746,
 'Charge Score': 5.0,
 'Local hydrophobic density Score': 68.7478,
 'Number of apolar alpha sphere': 115.0,
 'Proportion of apolar alpha sphere': 0.7718,
 'Total SASA': 187.486,
 'Polar SASA': 32.277,
 'Apolar SASA': 155.21,
 'Proportion of polar atoms': 20.879,
 'Alpha sphere density': 8.34,
 'Cent. of mass - Alpha Sphere max dist': 20.956,
 'Flexibility': 0.077}

## Features

**ColabFold MSA**

For single predictions, HHBlits' resource-intensive calculations are substituted by an HHM built from a Multiple Sequence Alignment computed through ColabFold. To use ColabFold's MSA services, please provide your email to the server's call below, respect [ColabFold's MSA server usage limits](https://github.com/sokrypton/ColabFold#:~:text=Is%20it%20okay%20to%20use%20the%20MMseqs2%20MSA%20server%20(cf.run_mmseqs2)%20on%20a%20local%20computer%3F) and [acknowledge the tool](https://github.com/sokrypton/ColabFold#how-do-i-reference-this-work) appropriately. Consider switching to the [local setup of HHBlits](https://github.com/frannerin/AlloPockets#hhblits-for-multiple-predictions) to perform multiple AlloPockets computations.

In [20]:
get_colabfold_msa(
    clean_pdb,
    "nerinfonzf98@univie.ac.at"#"youremail@yourinstitution.com"
)

### Features

In [21]:
features = get_features(
    clean_pdb
)
features

  0%|          | 0/9 [00:00<?, ?it/s]

Unnamed: 0_level_0,Residues,Residues,Residues,Residues,Residues,Residues,Label,Amino acids,Amino acids,Amino acids,...,HHBlits,HHBlits,HHBlits,HHBlits,HHBlits,HHBlits,HHBlits,HHBlits,HHBlits,HHBlits
Unnamed: 0_level_1,label_entity_id,label_asym_id,label_seq_id,auth_asym_id,auth_seq_id,pdbx_PDB_ins_code,label,label_comp_id_A,label_comp_id_C,label_comp_id_D,...,M->M,M->I,M->D,I->M,I->I,D->M,D->D,Neff,Neff_I,Neff_D
0,1,A,23,A,267,?,0,0,0,0,...,0.997923,0.000000,0.002403,0.000000,0.000000,0.897510,0.102380,10.978,0.000,1.120
1,1,A,24,A,268,?,0,0,0,0,...,1.000000,0.000000,0.000000,0.000000,0.000000,0.448755,0.551335,10.939,0.000,1.018
2,1,A,25,A,269,?,0,0,0,0,...,0.984866,0.009506,0.005513,0.384219,0.615572,0.000000,1.000000,10.947,1.096,1.004
3,1,A,26,A,270,?,0,0,0,0,...,0.946714,0.030543,0.022546,0.083853,0.916369,0.576743,0.423373,10.897,1.249,1.047
4,1,A,27,A,271,?,0,0,0,0,...,0.966606,0.031142,0.002543,1.000000,0.000000,0.609205,0.390664,10.925,1.232,1.204
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,1,A,259,A,503,?,0,0,0,0,...,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,11.097,0.000,0.000
236,1,A,260,A,504,?,0,0,0,0,...,0.993092,0.004938,0.001640,0.500000,0.500000,0.000000,0.000000,11.080,1.028,0.000
237,1,A,261,A,505,?,0,0,0,0,...,0.998615,0.001052,0.000000,0.166662,0.833353,1.000000,0.000000,11.067,1.000,1.001
238,1,A,262,A,506,?,0,0,0,0,...,0.997231,0.002629,0.000000,0.500000,0.500000,0.000000,0.000000,11.048,1.000,0.000


In [22]:
pockets_features = get_pockets_features(
    clean_pdb,
    pockets,
    features
)
pockets_features

Unnamed: 0_level_0,Pockets,Pockets,Pockets,Pockets,Pockets,FPocket,FPocket,FPocket,FPocket,FPocket,...,HHBlits,HHBlits,HHBlits,HHBlits,HHBlits,HHBlits,HHBlits,HHBlits,HHBlits,HHBlits
Unnamed: 0_level_1,pdb,pocket,nres,site_in_pocket,pocket_in_site,Pocket Score,Drug Score,Number of alpha spheres,Mean alpha-sphere radius,Mean alpha-sphere Solvent Acc.,...,M->M,M->I,M->D,I->M,I->I,D->M,D->D,Neff,Neff_I,Neff_D
0,6t4k,pocket7,18,0.0,0.0,0.1011,0.0012,75.0,3.6407,0.5436,...,0.961026,0.028474,0.010513,0.480596,0.519415,0.155223,0.844801,11.4545,1.520778,2.286333
1,6t4k,pocket1,32,0.212121,0.21875,0.6382,0.9502,149.0,3.6986,0.521,...,0.968324,0.020841,0.010762,0.274419,0.694345,0.184908,0.815107,11.444906,1.343313,1.958094
2,6t4k,pocket2,24,0.727273,1.0,0.4838,0.942,171.0,3.6925,0.5051,...,0.968406,0.007469,0.024081,0.282205,0.634529,0.29919,0.617445,11.287542,0.987292,1.77825
3,6t4k,pocket4,11,0.0,0.0,0.2847,0.0029,49.0,3.4688,0.5294,...,0.935668,0.059577,0.004758,0.309087,0.690864,0.197387,0.802642,11.421727,1.784909,1.737455
4,6t4k,pocket9,12,0.121212,0.333333,0.0635,0.0188,49.0,3.6567,0.5735,...,0.984276,0.011062,0.004626,0.269269,0.730794,0.223954,0.692751,11.442833,1.198417,1.38675
5,6t4k,pocket10,8,0.0,0.0,-0.0422,0.0012,38.0,3.6787,0.6109,...,0.968751,0.02811,0.003067,0.29381,0.706142,0.23099,0.768978,11.45225,1.517625,1.699
6,6t4k,pocket5,18,0.0,0.0,0.1913,0.0119,63.0,3.5531,0.5207,...,0.927862,0.053235,0.018876,0.450215,0.549761,0.174956,0.825026,11.451389,1.569611,1.834833
7,6t4k,pocket8,9,0.0,0.0,0.0946,0.0053,52.0,3.486,0.496,...,0.93808,0.042656,0.019295,0.212082,0.787962,0.186315,0.813731,11.404222,1.780111,2.979556
8,6t4k,pocket3,12,0.0,0.0,0.3352,0.0126,48.0,3.2805,0.3951,...,0.991962,0.003926,0.004061,0.344954,0.571738,0.164925,0.835094,11.434917,0.986667,1.372333
9,6t4k,pocket6,12,0.0,0.0,0.1324,0.002,61.0,3.5795,0.5048,...,0.989134,0.006043,0.004753,0.223516,0.359738,0.1542,0.845776,11.357833,0.644,1.3205


## Predict

In [23]:
data = prepare_data(pockets_features)
data

Unnamed: 0,FPocket_Pocket Score,FPocket_Drug Score,FPocket_Number of alpha spheres,FPocket_Mean alpha-sphere radius,FPocket_Mean alpha-sphere Solvent Acc.,FPocket_Mean B-factor of pocket residues,FPocket_Hydrophobicity Score,FPocket_Polarity Score,FPocket_Amino Acid based volume Score,FPocket_Pocket volume (Monte Carlo),...,HHBlits_M->M,HHBlits_M->I,HHBlits_M->D,HHBlits_I->M,HHBlits_I->I,HHBlits_D->M,HHBlits_D->D,HHBlits_Neff,HHBlits_Neff_I,HHBlits_Neff_D
6t4k_pocket7,0.1011,0.0012,75.0,3.6407,0.5436,0.2026,30.4444,10.0,4.2222,582.7361,...,0.961026,0.028474,0.010513,0.480596,0.519415,0.155223,0.844801,11.4545,1.520778,2.286333
6t4k_pocket1,0.6382,0.9502,149.0,3.6986,0.521,0.0775,64.7188,8.0,4.875,955.4678,...,0.968324,0.020841,0.010762,0.274419,0.694345,0.184908,0.815107,11.444906,1.343313,1.958094
6t4k_pocket2,0.4838,0.942,171.0,3.6925,0.5051,0.2603,61.2083,7.0,4.5,704.4867,...,0.968406,0.007469,0.024081,0.282205,0.634529,0.29919,0.617445,11.287542,0.987292,1.77825
6t4k_pocket4,0.2847,0.0029,49.0,3.4688,0.5294,0.0798,17.0,8.0,4.2727,396.8231,...,0.935668,0.059577,0.004758,0.309087,0.690864,0.197387,0.802642,11.421727,1.784909,1.737455
6t4k_pocket9,0.0635,0.0188,49.0,3.6567,0.5735,0.324,12.8333,6.0,4.3333,515.6422,...,0.984276,0.011062,0.004626,0.269269,0.730794,0.223954,0.692751,11.442833,1.198417,1.38675
6t4k_pocket10,-0.0422,0.0012,38.0,3.6787,0.6109,0.2755,11.75,6.0,3.75,287.1128,...,0.968751,0.02811,0.003067,0.29381,0.706142,0.23099,0.768978,11.45225,1.517625,1.699
6t4k_pocket5,0.1913,0.0119,63.0,3.5531,0.5207,0.1777,33.6111,8.0,4.7778,483.1293,...,0.927862,0.053235,0.018876,0.450215,0.549761,0.174956,0.825026,11.451389,1.569611,1.834833
6t4k_pocket8,0.0946,0.0053,52.0,3.486,0.496,0.1498,29.1111,4.0,4.1111,346.5158,...,0.93808,0.042656,0.019295,0.212082,0.787962,0.186315,0.813731,11.404222,1.780111,2.979556
6t4k_pocket3,0.3352,0.0126,48.0,3.2805,0.3951,0.2059,50.5,5.0,4.25,192.4231,...,0.991962,0.003926,0.004061,0.344954,0.571738,0.164925,0.835094,11.434917,0.986667,1.372333
6t4k_pocket6,0.1324,0.002,61.0,3.5795,0.5048,0.2302,6.9167,8.0,4.25,606.6791,...,0.989134,0.006043,0.004753,0.223516,0.359738,0.1542,0.845776,11.357833,0.644,1.3205


In [24]:
preds = model.predict_proba(data)[[1]].sort_values(1, ascending=False)
preds

Unnamed: 0,1
6t4k_pocket2,0.901122
6t4k_pocket1,0.648326
6t4k_pocket5,0.02459
6t4k_pocket7,0.001177
6t4k_pocket6,0.000759
6t4k_pocket4,0.000559
6t4k_pocket3,0.000344
6t4k_pocket9,0.00019
6t4k_pocket8,0.000141
6t4k_pocket10,7.4e-05


### View

In [25]:
v = view_pockets(
    clean_pdb,
    pockets={"pocket2": {"color": "green"}, "pocket1": {"color": "blue"}}, # {"pocketn": {"color": ""}}
    site_residues=site.residues,
    modulator_residues=site.modulator_residues,
)
v

PDBeMolstar(bg_color='#F7F7F7', color_data={'data': [{'struct_asym_id': 'A', 'representation': 'cartoon', 'rep…

### Optional: view allosteric network pathways

**View the allosteric pathways originating from the predicted pocket towards spatially distant residues in the structure.**

The allosteric network can be chosen between [correlationplus](https://github.com/tekpinar/correlationplus) (Dynamical Cross-Correlation from Elastic Network Models) or ProDy's [Perturbation Response Scanning](http://www.bahargroup.org/prody/prs/). The shortest paths between residues of the source pocket and other spatially distant residues (given a distance threshold) are calculated using [NetworkX](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.multi_source_dijkstra.html) with Dijkstra’s algorithm.

In [27]:
paths, G = get_pathways(
    clean_pdb.entry_id,
    pathways="correlationplus", # or 'prs'
    source_pocket="pocket2",
    pathway_dist_threshold=20,
    top_pathways=10
)

2025-07-09 14:37:08,011 - .prody - DEBUG - 1974 atoms and 1 coordinate set(s) were parsed in 0.03s.
2025-07-09 14:37:08,099 - .prody - DEBUG - Hessian was built in 0.08s.
2025-07-09 14:37:11,470 - .prody - DEBUG - 100 modes were calculated in 3.37s.


In [35]:
# The pathway number and rank is stored in 'label_asym_id',
# the order of the residues in the pathway in 'label_seq_id', 
# and the path length (lower "better") in 'occupancy'
paths.set_index(
    ["label_asym_id", "occupancy", "label_seq_id"]
)[
    ["auth_asym_id", "auth_comp_id", "auth_seq_id", 'pdbx_PDB_ins_code']
]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,auth_asym_id,auth_comp_id,auth_seq_id,pdbx_PDB_ins_code
label_asym_id,occupancy,label_seq_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
P104_top1,0.889891,1,A,MET,358,?
P104_top1,0.889891,2,A,VAL,361,?
P104_top1,0.889891,3,A,ARG,364,?
P104_top1,0.889891,4,A,MET,365,?
P104_top1,0.889891,5,A,TYR,369,?
P104_top1,0.889891,6,A,ASN,370,?
P104_top1,0.889891,7,A,ALA,371,?
P143_top2,0.898231,1,A,MET,358,?
P143_top2,0.898231,2,A,VAL,361,?
P143_top2,0.898231,3,A,VAL,363,?


In [36]:
# The function also returns the NetworkX Graph, storing in each edge the crosscorrelation 'value' calculated by correlationplus (higher is "more important"), and its transformation to a distance (shorter/lower is "more important")
G.edges(data=True)

EdgeDataView([(0, 1, {'value': 0.2503965031864735, 'distance': 1.3847096017702873}), (0, 2, {'value': 0.15535782691534972, 'distance': 1.8620242565142662}), (0, 3, {'value': 0.15544896654486615, 'distance': 1.8614377852178858}), (0, 4, {'value': 0.1273717805890785, 'distance': 2.0606450559688443}), (0, 5, {'value': 0.13196866543035582, 'distance': 2.0251907611014035}), (0, 6, {'value': 0.12733149165663377, 'distance': 2.060961415724329}), (0, 7, {'value': 0.11387590522732688, 'distance': 2.1726459663451894}), (0, 8, {'value': 0.1489624966617311, 'distance': 1.904060699262243}), (0, 9, {'value': 0.09776956382145939, 'distance': 2.325141949518143}), (0, 10, {'value': -0.011292062693835295, 'distance': 4.483655128990853}), (0, 11, {'value': -0.022503339841244533, 'distance': 3.7940914999610085}), (0, 12, {'value': -0.01559452086515476, 'distance': 4.160835589888711}), (0, 13, {'value': -0.05212622847877872, 'distance': 2.95408701301849}), (0, 14, {'value': -0.10283810842985885, 'distance'

In [38]:
view_pockets_pathways(
    clean_pdb, 
    pathways="correlationplus", # or 'prs'
    source_pocket="pocket2", # Source pocket to calculate paths to/from
    pathway_dist_threshold=20, # Minimum distance between residues to calculate paths from
    n_top_pathways=10, # Number of pathways to view in the structure
    pockets={"pocket2": {"color": "green"}}, # {"pocketn": {"color": ""}} # Same dictionary of pockets as above to visualize pockets alongside paths
    site_residues=site.residues,
    modulator_residues=site.modulator_residues,
)

2025-07-09 14:43:20,060 - .prody - DEBUG - 1974 atoms and 1 coordinate set(s) were parsed in 0.03s.


Pathway #1 (blue): A:MET:358, A:VAL:361, A:ARG:364, A:MET:365, A:TYR:369, A:ASN:370, A:ALA:371
Pathway #2 (cyan): A:MET:358, A:VAL:361, A:VAL:363, A:LEU:407, A:LEU:410
Pathway #3 (green): A:MET:358, A:VAL:361, A:ARG:364, A:CYS:366, A:SER:408, A:HIS:411
Pathway #4 (yellow): A:MET:358, A:VAL:360, A:SER:413, A:ILE:417, A:THR:421
Pathway #5 (red): A:LYS:354, A:ALA:449
Pathway #6 (purple): A:MET:358, A:VAL:361, A:ARG:364, A:MET:365, A:TYR:369, A:ASN:370, A:ASN:373
Pathway #7 (grey): A:MET:358, A:VAL:360, A:LEU:419, A:THR:421, A:PHE:450, A:HIS:451
Pathway #8 (light blue): A:LEU:293, A:MET:358, A:VAL:361, A:ARG:364, A:MET:365, A:ALA:368
Pathway #9 (light cyan): A:LYS:354, A:LEU:448, A:ALA:449
Pathway #10 (mint green): A:ARG:296, A:MET:358, A:VAL:361, A:ARG:364, A:MET:365, A:TYR:369, A:ASN:370


PDBeMolstar(bg_color='#F7F7F7', color_data={'data': [{'struct_asym_id': 'A', 'representation': 'cartoon', 'rep…