# Use Case:
## When multiple ligands are crystallized onto one protein and saved in the same pdb file all named LIG. Goal is to identify individual ligands and save them independently into different sdf files. 

In [1]:
from pathlib import Path
from pymol import cmd
import networkx as nx
from asapdiscovery.data.openeye import load_openeye_pdb, save_openeye_sdfs

# PUT YOUR PATH HERE

In [None]:
local_path = Path("/Users/choderalab/asapdiscovery/temp_storage/")

In [None]:
# Load the structure into PyMOL
cmd.load(local_path / "ZIKV_NS2B3_ligands.pdb", "zikv")


## Define a function to save residues named "LIG" to a PDB file

In [None]:
def save_ligand_residues(structure_name, output_file):
    # Select residues named "LIG"
    selection = f"{structure_name} and resn LIG"
    
    # Save the selected residues to a PDB file
    cmd.save(output_file, selection, format='pdb')

# Save all of the ligands into one file

In [None]:
save_ligand_residues("zikv", local_path / "ligand_only.pdb")

# Make a Graph out of the CONECT Records

In [None]:
pdb_file = local_path / "ligand_only.pdb"

G = nx.Graph()
with open(pdb_file, 'r') as f:
    for line in f:
        if line.startswith('CONECT'):
            # Extract atom numbers
            atom_numbers = [int(x.strip()) for x in line.split()[1:]]
            
            # Add edges to the graph
            for atom1 in atom_numbers[1:]:
                G.add_edge(atom_numbers[0], atom1)

Now have all the atoms in the same ligand on the same connected graph within G.

In [None]:
# Delete the previous structure before proceeding so no weird PyMOL issues
cmd.delete("zikv")
# Load the structure with only ligands into PyMOL
cmd.load(pdb_file, "zikv")

# Define a function to save a subset of atoms with CONECT records to a PDB file

In [None]:
def save_subset_with_conect(structure_name, atom_ids, output_file):
    # Select atoms by IDs
    selection = f"{structure_name} and id {'+'.join(map(str, atom_ids))}"
    
    # Create a new object containing only the selected atoms
    cmd.create("selected_atoms", selection)
    
    # Save the selection with CONECT records to a PDB file
    cmd.save(output_file, "selected_atoms", format='pdb')
    
    # Delete the temporary object
    cmd.delete("selected_atoms")

In [None]:
# Know which atom is from which ligand based on the graph constructed
# List of the different ligands as {} of atoms
atom_list = list(nx.connected_components(G))

for i, atom_ids in enumerate(atom_list):
    # Example: Save the selected subset with CONECT records to a PDB file
    save_subset_with_conect("zikv", atom_ids, local_path / f"lig_{i}.pdb")

# Load the pdb files and save to a single multi ligand sdf

In [None]:
ligs = []
for lig_pdb_file in local_path.glob("lig_*.pdb"):
    lig = load_openeye_pdb(lig_pdb_file)
    lig.SetTitle(lig_pdb_file.stem)
    ligs.append(lig)

# Save this in combined ligands sdf file

In [None]:
save_openeye_sdfs(ligs, local_path / "ZIKV_combined_ligss.sdf")