## Sequence Alignment and get canonical numbering of the residues and pdb id

In [None]:
from Bio import AlignIO
from pathlib import Path
from Bio import SeqIO
from asapdiscovery.data.backend.plip import get_interactions_plip
from asapdiscovery.data.backend.openeye import (
    load_openeye_sdf,
    load_openeye_pdb
)
from openeye import oechem

In [None]:
local_path = Path('/home/pengs/fold_zika')
align = AlignIO.read(local_path/"output.fasta", "fasta")

In [None]:
# Shove in the alignIO sequences from mafft and get the new number for each amino acid in the sequence
def get_new_renumber(align):
    res_num = dict()
    
    # Reference is the first sequence
    ref = align[0]
    # Initialize annotation for reference sequence, assume first residue is #1
    ref.letter_annotations["resnum"] = range(1,len(ref)+1)
    # Save the residue number
    res_num[ref.id] = list(ref.letter_annotations["resnum"])
    
    # Get a list of all other sequences to map back to the reference
    to_renumber = align[1:]
    for rec in to_renumber:
        # Initiate a new annotation for the sequences
        rec.letter_annotations["resnum"]=[None]*len(rec)
        # Set new residue numbers in rec based on alignment
        reslist = [[i,ref.letter_annotations["resnum"][i]] for i in range(len(ref)) if rec[i] != '-']
        for [i,r] in reslist:
            rec.letter_annotations["resnum"][i]=r
        # Set new residue numbers in the structure
        newresnums = [i for i in rec.letter_annotations["resnum"][:] if i != None]
        # Save to dict
        res_num[rec.id] = newresnums
    # Return the residue numbers of the aligned files
    # Dictionary with sequence id as the key and the residue numbers in a list as the value
    return res_num

align_num = get_new_renumber(align)

In [None]:
align_num

## Plip returns the pdb id of the active residue

In [None]:
# https://stackoverflow.com/questions/42851010/getting-residue-number-and-residue-name-in-biopython-pdb-module
from Bio import *
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.Polypeptide import PPBuilder

pdb_file = '/home/pengs/fold_zika/test_prep/zikv_ns2b3.pdb' 

In [None]:
# get id of Pdb residues from the pdb file and save in dictionary by the sequence number
def get_pdb_id(pdb_file):
    structure = PDBParser().get_structure('prot', pdb_file)   
    pdb_id = dict()
    count = 1
    for model in structure:
        for chain in model:
            chain_id = chain.get_full_id()[2]
            for i in chain.get_residues():
                resname = i.resname
                resnum = i.get_full_id()[3][1]
                id = (resname, resnum, chain_id)
                pdb_id[count] = id
                count += 1
    return pdb_id

In [None]:
pdb_id = get_pdb_id(pdb_file)
pdb_id_flip = {v:k for k,v in pdb_id.items()}

## get the openeye residue id required for running protein prep

In [None]:
#### Iterate over the residues of oeGraphMol and get the openeye residue ids
def find_oe_res_id(protein):
    #### Get the residues from the molecule
    residues = oechem.OEGetResidues(protein)
    # Getting the residue ids in the protein file
    oe_id = dict()
    count = 1
    for residue in residues:
        #oe_thing[(residue.GetName(),residue.GetResidueNumber(),residue.GetExtChainID())] = count
        # Since did not get the chain ID before, would just go with A now
        oe_id[count] = (residue.GetName(),residue.GetResidueNumber(),'A')
        count += 1
    return oe_id

In [None]:
zikv_oe = load_openeye_pdb(pdb_file)

In [None]:
find_oe_res_id(zikv_oe)

In [None]:
pdb_id

## The Openeye perceived Molecule residues have the same numbering as the pdb ids

## Trying to make a design unit using the different ways online sources defined residues in OEGraphMol molecules

In [None]:
from asapdiscovery.modeling.modeling import make_design_unit,get_oe_prep_opts,get_oe_structure_metadata_from_sequence
from asapdiscovery.data.backend.openeye import (
    oechem,
    oedocking,
    oegrid,
    oespruce,
    openeye_perceive_residues,
)
from asapdiscovery.data.backend.openeye import (
    load_openeye_sdf,
    load_openeye_pdb
)

In [None]:
opts = get_oe_prep_opts()
opts
#make_design_unit(zikv_oe, 'ASP:129::B')

In [None]:
metadata = oespruce.OEStructureMetadata()

In [None]:
prot_file = '/Users/choderalab/temp_storage/separate/output/lig_9_protein.pdb'
prot_lig9 = load_openeye_pdb(prot_file)
prot = load_openeye_pdb('/Users/choderalab/temp_storage/separate/pdb_intermediates/protein.pdb')
lig = load_openeye_sdf('/Users/choderalab/temp_storage/separate/lig_sdfs/lig_9.sdf')

https://openkinome.org/kinoml/notebooks/OpenEye_structural_featurizer.html

In [None]:
du = oespruce.OEMakeDesignUnits(prot, metadata, opts, 'TYR-161')

In [None]:
du = oespruce.OEMakeDesignUnits(prot, metadata, opts, 'TYR 161  A 1')

## This means something is wrong with the way I am defining residues (most likely wrong syntax)

In [None]:
from simtk.openmm import app

In [None]:
hv = oechem.OEHierView(
            prot, oechem.OEAssumption_BondedResidue +
            oechem.OEAssumption_ResPerceived + oechem.OEAssumption_PDBOrder)
topology = app.Topology()

for chain in hv.GetChains():
    for frag in chain.GetFragments():
        for hres in frag.GetResidues():
            # Get OE residue
            oe_res = hres.GetOEResidue()
            print(str(oe_res))

## OEChem may have defined the residues differently from OEGraphMol?

https://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/small-molecule-ligands

In [None]:
du = oespruce.OEMakeDesignUnits(prot, metadata, opts, 'TYR161:B')

## Trying to find out how molecules are being perceived after being loaded in by load_openeye_pdb

In [None]:
from asapdiscovery.data.backend.openeye import (
    oechem,
    oedocking,
    oegrid,
    oespruce,
    openeye_perceive_residues,
)

In [None]:
initial_prot = openeye_perceive_residues(zikv_oe,preserve_all=False)
initial_prot

In [None]:
site_residue = 'ASP:129::B'
if site_residue:
    print('ran')
    dus = list(oespruce.OEMakeDesignUnits(initial_prot, metadata, opts, site_residue))

In [None]:
oespruce.OEMakeDesignUnits(initial_prot, metadata, opts, site_residue)

## I think this is how the openeye object perceived id goes
https://github.com/choderalab/asapdiscovery/blob/main/asapdiscovery-data/asapdiscovery/data/backend/openeye.py#L15

## I don't think I can make the oe_active_residue to work (could not for the life of me find what the oe active residue is in the way openeye has defined the residue within their own sequence)

In [None]:
from openeye import oechem

In [None]:
file = '/home/pengs/fold_zika/test_prep/orig_bind/lig_0_protein.pdb'
comb = load_openeye_pdb(file)
oechem.OEPerceiveInteractionHints(comb)

In [None]:
from rdkit import Chem
sdf = Chem.SDMolSupplier( '/home/pengs/fold_zika/test_dock/combined_ligs.sdf' )
with open('/home/pengs/fold_zika/test_dock/combined_ligs.smi', 'w') as f:
    for mol in sdf:
        smi = Chem.MolToSmiles(mol)
        f.write("{}\n".format(smi))