# Plinder Tutorial
## Data Processing Pipeline
Loads Plinder systems into dictionary of numpy arrays


In [None]:
#feel free to skip, needed these for proper environment setup
%cd /net/galaxy/home/koes/nodonkor/OMTRA
%env PLINDER_MOUNT=/net/galaxy/home/koes/tjkatz/.local/share/plinder/2024-06/v2

#ran these in terminal to confirm zarr storage of per residue embeddings 
#export PLINDER_MOUNT="/net/galaxy/home/koes/tjkatz/.local/share/plinder/2024-06/v2"
#PYTHONPATH=/net/galaxy/home/koes/nodonkor/OMTRA python /net/galaxy/home/koes/nodonkor/OMTRA/omtra_pipelines/plinder_dataset/store_unlinked_structures.py --data /net/galaxy/home/koes/nodonkor/OMTRA/omtra_pipelines/plinder_dataset/test.parquet --split train --output plinder/apo/train.zarr --num_systems 3 --embeddings
#PYTHONPATH=/net/galaxy/home/koes/nodonkor/OMTRA python /net/galaxy/home/koes/nodonkor/OMTRA/omtra_pipelines/plinder_dataset/store_linked_structures.py --data /net/galaxy/home/koes/nodonkor/OMTRA/omtra_pipelines/plinder_dataset/test.parquet --split train --type apo --output plinder/apo/train.zarr  --num_systems 3 --embeddings

/net/galaxy/home/koes/nodonkor/OMTRA
env: PLINDER_MOUNT=/net/galaxy/home/koes/tjkatz/.local/share/plinder/2024-06/v2


In [9]:
from omtra_pipelines.plinder_dataset.plinder_pipeline import *
import plinder.core.utils.config
from plinder.core import PlinderSystem
from omtra_pipelines.plinder_dataset.utils import NPNDE_MAP, LIGAND_MAP
from omtra.constants import aa_substitutions, residue_to_single

%set_env TOKENIZERS_PARALLELISM=false
import numpy as np
import torch

from esm.models.esm3 import ESM3
from esm.sdk.api import (
    ESM3InferenceClient,
    ESMProtein,
    LogitsConfig,
    LogitsOutput,
    )
from esm.utils.structure.protein_chain import ProteinChain
from esm.utils.structure.protein_complex import ProteinComplex
# TODO: update this tutorial

env: TOKENIZERS_PARALLELISM=false


In [7]:
import pandas as pd 

df = pd.read_parquet('/net/galaxy/home/koes/nodonkor/OMTRA/omtra_pipelines/plinder_dataset/plinder_filtered.parquet')
print(df.columns)
testdf = df[~df['apo_ids'].isnull()].iloc[0:200, :]
print(testdf.shape)
testdf.to_parquet('/net/galaxy/home/koes/nodonkor/OMTRA/omtra_pipelines/plinder_dataset/test.parquet')

Index(['system_id', 'ligand_id', 'ligand_type', 'ccd_code', 'split', 'apo_ids',
       'pred_ids', 'plip_type', 'num_heavy_atoms',
       'num_unresolved_heavy_atoms', 'frac_within_4A_receptor', 'is_covalent',
       'is_ion', 'is_artifact', 'is_cofactor', 'is_fragment',
       'crystal_contacts', 'num_interacting_res', 'volume_overlap_protein',
       'volume_overlap_organic_cofactors',
       'volume_overlap_inorganic_cofactors', 'determination_method',
       'resolution', 'r', 'rfree', 'r_minus_rfree', 'num_pocket_atoms',
       'num_pharmacophores'],
      dtype='object')
(200, 28)


The script will download the data from the remote data directory if it is not found in the local cache directory. 

Plinder data can be found on the cluster in `/net/galaxy/home/koes/tjkatz/.local/share/plinder/2024-06/v2`.

In [3]:
cfg = plinder.core.get_config()
print(f"local cache directory: {cfg.data.plinder_dir}")
print(f"remote data directory: {cfg.data.plinder_remote}")

local cache directory: /net/galaxy/home/koes/tjkatz/.local/share/plinder/2024-06/v2/plinder/2024-06/v2
remote data directory: gs://plinder/2024-06/v2


The `SystemProcessor` class takes in an `atom_map` and optionall a `pocket_cutoff`, which by default is set to 5A. 

The `process_system` call will convert the receptor and any linked apo structures into `StructureData` objects, which include numpy arrays of the atom coordinates, atom names, residue ids, residue names, and chain ids. 

For each ligand, a pocket will be extracted and returned as a `StructureData` object (and optionally saved as a PDB file). 

The ligands will be returned as `LigandData` objects with coords, atom_types, atom_charges, bond_types, and bond_indices as numpy arrays. 

The entire system is returned as a dictionary with the following keys: 'receptor', 'ligands', 'pockets', 'apo_structures', 'entry_annotation', 'system_annotation'. Ligands and pockets are dictionaries indexed by the plinder ligand ID and apo structures are indexed by the plinder linked structure id. Entry annotation corresponds to the PDB entry-level annotation stored in plinder and the system annotation is that at the sytem level. 

In [None]:
system_processor = SystemProcessor(
    ligand_atom_map=LIGAND_MAP,
    npnde_atom_map=NPNDE_MAP,
    pocket_cutoff=8.0,
    link_type="apo",
    system_id="7gcm__1__1.A__1.D"
)
result = system_processor.process_system()

In [9]:
import zarr

#using num systems
zarr_root = zarr.open_group("/net/galaxy/home/koes/nodonkor/OMTRA/plinder/apo/train.zarr", mode="r")

# Load system lookup metadata
system_lookup = zarr_root.attrs["system_lookup"]

# Open the pocket embeddings array
pocket_bb = zarr_root['pocket/backbone_coords']
pocket_embeddings = zarr_root["pocket/embeddings"]

# Example: get embedding for second system
first_system = system_lookup[2]  # or find by system_id, etc.
print(first_system)

pocket_bb_start = first_system["pocket_bb_start"]
pocket_bb_end = first_system["pocket_bb_end"]
p = pocket_bb[pocket_bb_start:pocket_bb_end]

emb_start = first_system["embeddings_start"]
emb_end = first_system["embeddings_end"]
# Extract the embedding
embedding = pocket_embeddings[emb_start:emb_end]

#embedding shape[0] should match pocket backbone length + 2 (+2 for embeddings special start/end characters)
print("Embedding shape:", embedding.shape, "\n Pocket length:", len(p))
print(embedding.shape[0] == len(p)+2)

{'system_id': '4c22__1__3.B__3.N', 'ligand_id': '3.N', 'system_idx': 2, 'linkages': None, 'ccd': 'FUC', 'link_type': None, 'lig_sdf': 'plinder/2024-06/v2/systems/4c22__1__3.B__3.N/ligand_files/3.N.sdf', 'rec_cif': 'plinder/2024-06/v2/systems/4c22__1__3.B__3.N/receptor.cif', 'npnde_idxs': None, 'rec_start': 4846, 'rec_end': 9578, 'backbone_start': 613, 'backbone_end': 1217, 'lig_atom_start': 53, 'lig_atom_end': 64, 'lig_bond_start': 55, 'lig_bond_end': 66, 'pocket_start': 730, 'pocket_end': 924, 'pocket_bb_start': 93, 'pocket_bb_end': 115, 'embeddings_start': 97, 'embeddings_end': 121, 'pharm_start': 35, 'pharm_end': 45}
Embedding shape: (24, 1536) 
 Pocket length: 22
True


In [None]:
%run /net/galaxy/home/koes/nodonkor/OMTRA/omtra_pipelines/plinder_dataset/store_linked_structures.py --data /net/galaxy/home/koes/nodonkor/OMTRA/omtra_pipelines/plinder_dataset/test.parquet --split train --type apo --output plinder/apo/train.zarr  --embeddings




2025-04-25 03:07:17,236 | __main__:77 | INFO : Starting processing with 1 CPUs, max_pending=2
  return cls(**configuration_parsed)
  return cls(**configuration_parsed)
  return cls(**configuration_parsed)
  return cls(**configuration_parsed)
  return cls(**configuration_parsed)
  return cls(**configuration_parsed)
  return cls(**configuration_parsed)
  return cls(**configuration_parsed)
  return cls(**configuration_parsed)
  return cls(**configuration_parsed)
  return cls(**configuration_parsed)
  return cls(**configuration_parsed)
2025-04-25 03:07:18,560 | omtra_pipelines.plinder_dataset.plinder_links_zarr:643 | INFO : Processing 35013 systems with 1 workers
Processing system batches:   0%|          | 0/176 [00:00<?, ?batches/s]2025-04-25 03:07:18,576 | omtra_pipelines.plinder_dataset.plinder_links_zarr:659 | INFO : Processing batch 1/176 with 200 systems


AttributeError: module '__main__' has no attribute '__spec__'

In [5]:
print(result.keys())

dict_keys(['apo', 'links', 'annotation'])


In [10]:
EMBEDDING_CONFIG = LogitsConfig(
    sequence=False, return_embeddings=True, return_hidden_states=False
)

def embed_protein_complex(model: ESM3InferenceClient, protein_complex: ProteinComplex) -> LogitsOutput:
    
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model =  model.to(device)

    protein = ESMProtein.from_protein_complex(protein_complex)
    protein_tensor = model.encode(protein)
    output = model.logits(protein_tensor, EMBEDDING_CONFIG)
    if device == torch.device("cuda"):
        model.to(torch.device("cpu"))
    return output.embeddings.cpu().numpy()

def embed_chain(model: ESM3InferenceClient, protein_chain: ProteinChain) -> LogitsOutput:
    
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model =  model.to(device)

    protein = ESMProtein.from_protein_chain(protein_chain)
    protein_tensor = model.encode(protein)
    output = model.logits(protein_tensor, EMBEDDING_CONFIG)
    if device == torch.device("cuda"):
        model.to(torch.device("cpu"))
    return output.embeddings.cpu().numpy()



In [11]:
def ESM3_embed(res_name, chain_id, backbone_data, bb_mask):   
    #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
    model = ESM3.from_pretrained("esm3-open", device=torch.device("cpu"))

    residue_names = res_name[bb_mask]
    chain_ids = chain_id[bb_mask]
    coords = backbone_data

    # check if we need to split pocket sequence by chain_id to concatenate for protein_complex
    if len(set(chain_ids)) > 1:
        unique_chain_id = set()
        unique_chain_id = [chain for chain in chain_ids if chain not in unique_chain_id]
        chain_mask = []
        split_seq = []
        for chain in unique_chain_id:
            chain_mask.append(np.where(chain_ids == chain)[0])
            split_seq.append(residue_names[chain_mask[-1]])
    else:
        chain_mask, split_seq = None, None


    if split_seq:
        concat_seq = []
        esm_chains = []
        layers = list(coords)
        for seq in range(len(split_seq)):
            temp = []
            if len(layers) == 0: 
                break 
            for i in range(0, len(seq),3):
                if residue_names[i] not in aa_substitutions:
                    temp.append(residue_to_single[seq[i]]) 
                else: 
                    try: 
                        temp.append(aa_substitutions[seq[i]]) 
                    except: 
                        temp.append(residue_to_single['UNK'])
            concat_seq.append(temp)

            esm_chains.append(ProteinChain.from_backbone_atom_coordinates(layers[0:len(temp)], sequence=temp))
            layers = layers[len(temp)+1:]

        concat_seq = '|'.join(concat_seq)
        protein_complex = ProteinComplex.from_chains(esm_chains)
        return embed_protein_complex(model, protein_complex)

    else:
        sequence = []
        for i in range(0, len(residue_names),3):
            if residue_names[i] not in aa_substitutions:
                sequence.append(residue_to_single[residue_names[i]]) 
            else: 
                try: 
                    sequence.append(aa_substitutions[residue_names[i]]) 
                except: 
                    sequence.append(residue_to_single['UNK'])
        
        chain_seq = ''.join(sequence)
        chain = ProteinChain.from_backbone_atom_coordinates(coords, sequence=chain_seq)
        return embed_chain(model, chain)

In [12]:
#this should match the embeddings generated with the local implement of the ESM3 embed function
print(result["apo"]['7k3t_A'][0].pocket.pocket_embedding)
print(result["apo"]['7k3t_A'][0].pocket.pocket_embedding.shape)

#print(result["apo"]['7k3t_A'][0].pocket.backbone.coords[0])
#print(result["apo"]['7k3t_A'][0].pocket.chain_ids)
#print(result["apo"]['7k3t_A'][0].pocket.residue_names)

embedding = ESM3_embed(result["apo"]['7k3t_A'][0].pocket.res_names, result["apo"]['7k3t_A'][0].pocket.chain_ids, result["apo"]['7k3t_A'][0].pocket.backbone.coords, result["apo"]['7k3t_A'][0].pocket.backbone_mask)
print(embedding)
print(embedding.shape)

[[[  57.46649     12.286621   -43.25647   ...  -64.02086     46.0403
     44.52671  ]
  [ 241.72176     20.567871    -6.1555786 ... -260.55695    201.69476
     93.41516  ]
  [ 149.90399   -185.30414     53.811676  ... -180.73975    104.03235
    132.5448   ]
  ...
  [  66.27484     65.163574    70.37598   ... -167.59225     51.625427
     72.36229  ]
  [  53.55011   -108.667175   107.80731   ... -116.176636    16.85482
    -58.165955 ]
  [ -37.105103     4.1192627   42.01111   ... -105.40344     36.250763
     68.942505 ]]]
(1, 50, 1536)


  with torch.no_grad(), torch.cuda.amp.autocast(enabled=False):  # type: ignore


[[[  57.46649     12.286621   -43.25647   ...  -64.02086     46.0403
     44.52671  ]
  [ 241.72176     20.567871    -6.1555786 ... -260.55695    201.69476
     93.41516  ]
  [ 149.90399   -185.30414     53.811676  ... -180.73975    104.03235
    132.5448   ]
  ...
  [  66.27484     65.163574    70.37598   ... -167.59225     51.625427
     72.36229  ]
  [  53.55011   -108.667175   107.80731   ... -116.176636    16.85482
    -58.165955 ]
  [ -37.105103     4.1192627   42.01111   ... -105.40344     36.250763
     68.942505 ]]]
(1, 50, 1536)


In [5]:
print(result["7ueu__1__1.A_2.A__2.C_2.D_2.E"])

[SystemData(system_id='7ueu__1__1.A_2.A__2.C_2.D_2.E', ligand_id='2.C', receptor=StructureData(coords=array([[-31.115, -33.887,  19.519],
       [-30.972, -33.173,  18.258],
       [-31.051, -31.662,  18.47 ],
       ...,
       [-39.928,  31.565, -28.984],
       [-39.109,  29.682, -30.212],
       [-39.605,  30.221, -29.039]], dtype=float32), atom_names=array(['N', 'CA', 'C', ..., 'CE1', 'CE2', 'CZ'], dtype='<U6'), res_ids=array([ 19,  19,  19, ..., 377, 377, 377]), res_names=array(['SER', 'SER', 'SER', ..., 'PHE', 'PHE', 'PHE'], dtype='<U5'), chain_ids=array(['A', 'A', 'A', ..., 'B', 'B', 'B'], dtype='<U4'), cif='systems/7ueu__1__1.A_2.A__2.C_2.D_2.E/receptor.cif'), ligand=LigandData(coords=array([[-27.306,  13.629, -21.888],
       [-27.091,  13.638, -23.552],
       [-28.361,  14.366, -24.192],
       [-25.82 ,  14.244, -23.951],
       [-27.06 ,  12.1  , -23.978],
       [-27.123,  11.432, -25.438],
       [-28.545,  11.036, -25.787],
       [-26.538,  12.313, -26.525],
       [-

In [24]:
print(result["receptor"])

StructureData(coords=array([[-31.115, -33.887,  19.519],
       [-30.972, -33.173,  18.258],
       [-31.051, -31.662,  18.47 ],
       ...,
       [-39.928,  31.565, -28.984],
       [-39.109,  29.682, -30.212],
       [-39.605,  30.221, -29.039]], dtype=float32), atom_names=array(['N', 'CA', 'C', ..., 'CE1', 'CE2', 'CZ'], dtype='<U6'), res_ids=array([ 19,  19,  19, ..., 377, 377, 377]), res_names=array(['SER', 'SER', 'SER', ..., 'PHE', 'PHE', 'PHE'], dtype='<U5'), chain_ids=array(['A', 'A', 'A', ..., 'B', 'B', 'B'], dtype='<U4'), cif='systems/7ueu__1__1.A_2.A__2.C_2.D_2.E/receptor.cif')


In [25]:
result["ligands"]

{'2.C': LigandData(coords=array([[-27.306,  13.629, -21.888],
        [-27.091,  13.638, -23.552],
        [-28.361,  14.366, -24.192],
        [-25.82 ,  14.244, -23.951],
        [-27.06 ,  12.1  , -23.978],
        [-27.123,  11.432, -25.438],
        [-28.545,  11.036, -25.787],
        [-26.538,  12.313, -26.525],
        [-26.257,  10.089, -25.294],
        [-25.006,  10.171, -24.597],
        [-24.43 ,   8.756, -24.475],
        [-24.156,   8.227, -25.789],
        [-23.078,   8.716, -23.729],
        [-23.312,   8.529, -22.336],
        [-22.423,   7.443, -24.317],
        [-22.882,   6.282, -23.624],
        [-23.032,   7.337, -25.729],
        [-22.164,   7.763, -26.811],
        [-22.302,   7.345, -28.104],
        [-21.403,   7.913, -28.852],
        [-20.663,   8.724, -28.078],
        [-21.13 ,   8.656, -26.761],
        [-20.541,   9.382, -25.819],
        [-19.524,  10.168, -26.127],
        [-19.053,  10.244, -27.368],
        [-19.59 ,   9.55 , -28.37 ],
        [-18.

In [26]:
result["npndes"]

{'2.D': LigandData(coords=array([[-30.3033,  14.3827, -24.0104]], dtype=float32), atom_types=array([43]), atom_charges=array([2], dtype=int32), bond_types=array([], dtype=int32), bond_indices=array([], shape=(0, 2), dtype=int64), sdf='systems/7ueu__1__1.A_2.A__2.C_2.D_2.E/ligand_files/2.D.sdf'),
 '2.E': LigandData(coords=array([[-18.115,  12.792, -31.79 ],
        [-17.439,  11.838, -30.959],
        [-19.394,  13.258, -31.103],
        [-20.277,  12.143, -30.931]], dtype=float32), atom_types=array([0, 2, 0, 2]), atom_charges=array([0, 0, 0, 0], dtype=int32), bond_types=array([1, 1, 1], dtype=int32), bond_indices=array([[0, 1],
        [0, 2],
        [2, 3]]), sdf='systems/7ueu__1__1.A_2.A__2.C_2.D_2.E/ligand_files/2.E.sdf')}

In [27]:
result["pockets"]

{'2.C': StructureData(coords=array([[-23.757,   7.527, -19.474],
        [-22.557,   8.032, -18.823],
        [-21.985,   7.054, -17.811],
        ...,
        [-29.782,   2.17 , -27.18 ],
        [-31.293,   2.101, -27.27 ],
        [-29.204,   0.819, -26.751]], dtype=float32), atom_names=array(['N', 'CA', 'C', 'O', 'N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2',
        'CD1', 'N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', 'N', 'CA', 'C',
        'O', 'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2',
        'CZ', 'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'NE', 'CZ', 'NH1',
        'NH2', 'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'N', 'CA',
        'C', 'O', 'CB', 'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2',
        'CE1', 'CE2', 'CZ', 'N', 'CA', 'C', 'O', 'N', 'CA', 'C', 'O', 'CB',
        'CG', 'OD1', 'ND2', 'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'N',
        'CA', 'C', 'O', 'CB', 'CG', 'CD', 'CE', 'NZ', 'N', 'CA', 'C', 'O',
        'N', 'CA', 'C', 'O', 'N', 'CA', 'C', 'O', 'CB',

In [31]:
result["system_annotation"]

{'pdb_id': '7ueu',
 'biounit_id': '1',
 'ligands': [{'pdb_id': '7ueu',
   'biounit_id': '1',
   'asym_id': 'C',
   'instance': 2,
   'ccd_code': 'AN2',
   'plip_type': 'SMALLMOLECULE',
   'bird_id': '',
   'centroid': [-24.080585479736328, 10.235146522521973, -25.439708709716797],
   'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@@](N)(=O)O)[C@@H](O)[C@H]1O',
   'resolved_smiles': 'O[C@@H]1[C@@H](CO[P@@](=O)(O[P@@](=O)([O])N)O)O[C@H]([C@@H]1O)n1cnc2c1ncnc2N',
   'residue_numbers': [1],
   'rdkit_canonical_smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@](=O)(O)O[P@@](N)(=O)O)[C@@H](O)[C@H]1O',
   'molecular_weight': 426.0453993519999,
   'crippen_clogp': -1.7796000000000007,
   'num_rot_bonds': 6,
   'num_hbd': 6,
   'num_hba': 12,
   'num_rings': 3,
   'num_heavy_atoms': 27,
   'is_covalent': False,
   'covalent_linkages': [],
   'neighboring_residues': {'1.A': [261, 284],
    '2.A': [25,
     26,
     27,
     28,
     29,
     30,
     31,
     32,
     94,
     96,
     97,
   

In [30]:
print(result["system_annotation"]["ligands"][0]["is_artifact"])

False


In [32]:
result = system_processor.process_system(
    system_id="6rgu__1__2.A__2.H", save_pockets=False
)

2025-02-12 17:21:30,260 | plinder.core.utils.cpl.download_paths:24 | INFO : runtime succeeded: 0.00s
2025-02-12 17:21:30,262 | plinder.core.utils.cpl.download_paths:24 | INFO : runtime succeeded: 0.00s
2025-02-12 17:21:30,263 | plinder.core.index.utils:148 | INFO : loading entries from 1 zips
2025-02-12 17:21:30,309 | plinder.core.index.utils:163 | INFO : loaded 1 entries
2025-02-12 17:21:30,311 | plinder.core.index.utils.load_entries:24 | INFO : runtime succeeded: 0.16s
2025-02-12 17:21:30,448 | plinder.core.utils.cpl.download_paths:24 | INFO : runtime succeeded: 0.00s
2025-02-12 17:21:30,449 | plinder.core.utils.cpl.download_paths:24 | INFO : runtime succeeded: 0.00s
2025-02-12 17:21:30,854 | plinder.core.utils.cpl.download_paths:24 | INFO : runtime succeeded: 0.09s
2025-02-12 17:21:31,500 | plinder.core.scores.links.query_links:24 | INFO : runtime succeeded: 0.87s
2025-02-12 17:21:35,810 | plinder.core.utils.cpl.download_paths:24 | INFO : runtime succeeded: 0.00s
2025-02-12 17:21:35

In [33]:
result["apo_structures"]

{'5mxe_A': {'holo': StructureData(coords=array([[-41.435,  18.111,  42.813],
         [-41.035,  19.107,  43.846],
         [-39.591,  18.868,  44.318],
         ...,
         [-29.7  ,  22.965,  47.95 ],
         [-29.282,  23.292,  49.331],
         [-34.194,  18.75 ,  45.047]], dtype=float32), atom_names=array(['N', 'CA', 'C', ..., 'CE', 'NZ', 'OXT'], dtype='<U6'), res_ids=array([ 25,  25,  25, ..., 369, 369, 369]), res_names=array(['SER', 'SER', 'SER', ..., 'LYS', 'LYS', 'LYS'], dtype='<U5'), chain_ids=array(['A', 'A', 'A', ..., 'A', 'A', 'A'], dtype='<U4'), cif='systems/6rgu__1__2.A__2.H/receptor.cif'),
  'link': StructureData(coords=array([[-41.60343 ,  18.07293 ,  42.77941 ],
         [-41.132004,  18.910458,  43.918167],
         [-39.660923,  18.673107,  44.359802],
         ...,
         [-29.808643,  23.145782,  47.97537 ],
         [-29.235401,  23.28697 ,  49.33817 ],
         [-34.255478,  18.8527  ,  45.05255 ]], dtype=float32), atom_names=array(['N', 'CA', 'C', ..., 'CE

In [34]:
result["pred_structures"]

{'C7BLE4_A': {'holo': StructureData(coords=array([[-41.435,  18.111,  42.813],
         [-41.035,  19.107,  43.846],
         [-39.591,  18.868,  44.318],
         ...,
         [-29.7  ,  22.965,  47.95 ],
         [-29.282,  23.292,  49.331],
         [-34.194,  18.75 ,  45.047]], dtype=float32), atom_names=array(['N', 'CA', 'C', ..., 'CE', 'NZ', 'OXT'], dtype='<U6'), res_ids=array([ 25,  25,  25, ..., 369, 369, 369]), res_names=array(['SER', 'SER', 'SER', ..., 'LYS', 'LYS', 'LYS'], dtype='<U5'), chain_ids=array(['A', 'A', 'A', ..., 'A', 'A', 'A'], dtype='<U4'), cif='systems/6rgu__1__2.A__2.H/receptor.cif'),
  'link': StructureData(coords=array([[-41.706627,  14.399178,  42.530357],
         [-41.287693,  15.80853 ,  42.596523],
         [-39.787506,  16.022655,  42.83176 ],
         ...,
         [-29.947872,  23.047075,  47.827538],
         [-29.023716,  22.799688,  48.941223],
         [-34.029892,  18.561142,  45.028866]], dtype=float32), atom_names=array(['N', 'CA', 'C', ..., '