# import

In [24]:
from asapdiscovery.data.openeye import load_openeye_pdb, load_openeye_sdf, save_openeye_sdf, save_openeye_sdfs, openeye_perceive_residues
from asapdiscovery.modeling.modeling import split_openeye_mol
from asapdiscovery.modeling.schema import MoleculeFilter, MoleculeComponent
import numpy as nps

# PUT YOUR PATH HERE

In [3]:
from pathlib import Path
local_path = Path("/Users/choderalab/asapdiscovery/temp_storage/")

# Attempt 1: Using OpenEye to Interpret

In [29]:
pdb_file = local_path / "ZIKV_NS2B3_ligands.pdb"
pdb = load_openeye_pdb(pdb_file)

In [None]:
component_dict = split_openeye_mol(pdb, keep_one_lig=False)

In [None]:
ligs = component_dict["lig"]

In [None]:
matrix = np.full((ligs.NumAtoms(), ligs.NumAtoms()), False, dtype=bool)

In [None]:
for bond in ligs.GetBonds():
    i = bond.GetBgnIdx()
    j = bond.GetEndIdx()
    matrix[i,j] = True
    matrix[j,i] = True

## check to see if the adjacency matrix looks insane

In [None]:
import plotly.express as px

In [None]:
fig = px.imshow(matrix, height=1200, width=1200)
fig.show()

In [None]:
import networkx

In [None]:
g = networkx.Graph(matrix)

In [None]:
len(list(networkx.connected_components(g)))

## wait a sec, this can't be right, there are way more than 4 ligs

### let's see what happens

In [None]:
atom_list = list(networkx.connected_components(g))

In [None]:
new_ligs = {}
for i, atom_subset in enumerate(atom_list):
    new_lig = ligs.CreateCopy()
    for atom in new_lig.GetAtomIter():
        if not atom.GetIdx() in atom_subset:
            new_lig.DeleteAtom(atom)
    new_ligs[i] = new_lig

In [None]:
for i, lig in new_ligs.items():
    lig.SetTitle(f"LIG_{i}")

In [None]:
ligs = list(new_ligs.values())

In [None]:
for lig in new_ligs.values():
    save_openeye_sdf(lig, local_path / f"{lig.GetTitle()}.sdf")

### they look like garbage

# Attempt 2: Use CONECT Records and PyMol to select the right atoms

## save first using pymol since it will rewrite the connect records it needs without making too many inferences

In [9]:
from pymol import cmd

# Load the structure into PyMOL
cmd.load(local_path / "ZIKV_NS2B3_ligands.pdb", "zikv")

# Define a function to save residues named "LIG" to a PDB file
def save_ligand_residues(structure_name, output_file):
    # Select residues named (resn) "LIG"
    selection = f"{structure_name} and resn LIG"
    
    # Save the selected residues to a PDB file
    cmd.save(output_file, selection, format='pdb')

# Example: Save residues named "LIG" to a new PDB file
save_ligand_residues("zikv", local_path / "ligand_only.pdb")

## Make a Graph out of the CONECT Records

In [20]:
pdb_file = local_path / "ligand_only.pdb"

In [21]:
import networkx as nx
G = nx.Graph()
with open(pdb_file, 'r') as f:
    for line in f:
        if line.startswith('CONECT'):
            # Extract atom numbers
            atom_numbers = [int(x.strip()) for x in line.split()[1:]]
            
            # Add edges to the graph
            for atom1 in atom_numbers[1:]:
                G.add_edge(atom_numbers[0], atom1)

In [22]:
len(list(nx.connected_components(G)))

102

## 102 ligands seems more reasonable

## Part of the OPENEYE attempt

In [None]:
# Try to make this work by deleting all the components that do not match on the OEMol object
# Does not work because the atom index in the OEMol object does not match the graph index
# The graph index is from the original pdb file while OEMol re-indexes all the atoms
atom_list = list(nx.connected_components(G))
new_ligs = {}
for i, atom_subset in enumerate(atom_list):
    new_lig = pdb.CreateCopy()
    new_lig.SetTitle(f"LIG_{i}")
    for atom in new_lig.GetAtomIter():
        if not atom.GetIdx() in atom_subset:
            new_lig.DeleteAtom(atom)
    new_ligs[i] = new_lig

# To visualize how it fails, here are the ligand structures that are generated
# See file in pymol
for lig in new_ligs.values():
    save_openeye_sdf(lig, local_path / f"{lig.GetTitle()}.sdf")

## Now load it in with PyMol so keep the same index of the atoms
This thing actually works

In [33]:
from pymol import cmd

In [34]:
# Load the structure into PyMOL
cmd.load(pdb_file, "zikv")

In [35]:
# Define a function to save a subset of atoms with CONECT records to a PDB file
def save_subset_with_conect(structure_name, atom_ids, output_file):
    # Select atoms by IDs
    selection = f"{structure_name} and id {'+'.join(map(str, atom_ids))}"
    
    # Create a new object containing only the selected atoms
    cmd.create("selected_atoms", selection)
    
    # Save the selection with CONECT records to a PDB file
    cmd.save(output_file, "selected_atoms", format='pdb')
    
    # Delete the temporary object
    cmd.delete("selected_atoms")

In [53]:
# Know which atom is from which ligand based on the graph constructed
# List of the different ligands as {} of atoms
atom_list = list(nx.connected_components(G))

for i, atom_ids in enumerate(atom_list):
    # Example: Save the selected subset with CONECT records to a PDB file
    save_subset_with_conect("zikv", atom_ids, local_path / f"lig_{i}.pdb")

# Load the pdb files and save to a single multi ligand sdf

In [26]:
from pathlib import Path
import os

In [27]:
ligs = []
for lig_pdb_file in local_path.glob("lig_*.pdb"):
    lig = load_openeye_pdb(lig_pdb_file)
    lig.SetTitle(lig_pdb_file.stem)
    ligs.append(lig)
    os.remove(lig_pdb_file)

In [42]:
save_openeye_sdfs(ligs, local_path / "ZIKV_combined_ligs.sdf")

PosixPath('/Users/choderalab/asapdiscovery/temp_storage/ZIKV_combined_ligs.sdf')