As a first step of designing a new ligand for our target JNK-1, the structure sequence of JNK-1 was acquired from UniProt (http://www.uniprot.org/) (UniProt ID: P45983). From UniProt additional information was also obtained, including the sequence length: 427 amino acids, the status: UniProtKB reviewed (Swiss-Prot), the Protein Existence: Evidence at protein level and finally the mass (kDa): 48,296. Moreover, BLAST (Basic Local Alignment Search Tool, under ‘Advanced’) was used in order to find similar targets based on sequence in the same species (Homo sapiens), and the first hit that is not an isoform of JNK-1 was used for the alignment part of the study. By doing this search, ‘JNK2/3 in complex with 3-(4-{[(2-chlorophenyl)carbamoyl]amino}-1H-pyrazol-1-yl)-N-(2-methylpyridin-4-yl)benzamide [Homo sapiens]’ was used as the similar target, with sequence ID: 4w4v and length of 366 amino acids.

In [12]:
# First we import the right python modules.
import nglview
import os
import shutil
from Bio.PDB import PDBParser, PDBIO, Select,  PDBList, MMCIFParser, StructureAlignment
import Bio.Align
import os
from pathlib import Path
import rdkit

In [13]:
# and the local scripts
from scripts import viewer
from scripts import bio_align

ModuleNotFoundError: No module named 'scripts'

In [14]:
HOMEDIR = str(Path.home())
os.chdir(HOMEDIR)
# We need to check whether the directory is there
try:
    os.mkdir('Bioinformatics')
except:
    print("Directory already exists")
os.chdir('Bioinformatics')

Directory already exists


In [15]:
# In order to retrieve the 3D structure of our protein we used the pdb code of JNK-1, 2GMX. 

TARGET_PDB_ID = "2gmx" 

import nglview
view = nglview.show_pdbid(TARGET_PDB_ID)
view

NGLWidget()

In [16]:
# Moreover, in order to design new drugs it is crucial to investigate the interactions of the co-crystalized ligand 
# with JNK-1. For this, we retrieved the RCSB code of the ligand, 877.

LIGAND_CODE = "877" 

view.center(LIGAND_CODE)
view

NGLWidget(n_components=1)

In [17]:
# In order to observe the most important interactions between the ligand and the protein, 
# we visualize the interactions within 5 Angstrom of the ligand. 

# For this step we downloaded the coordinates of 2GMX from RCSB.

pdbl = PDBList()
pdbl.retrieve_pdb_file(TARGET_PDB_ID, pdir=TARGET_PDB_ID)

Structure exists: '2gmx/2gmx.cif' 




'2gmx/2gmx.cif'

In [18]:
# Next, we generate a BioPython object from the coordinates, which we can use for various tasks.
parser = MMCIFParser()
structure = parser.get_structure("TARGETPROT",'{}/{}.cif'.format(TARGET_PDB_ID,TARGET_PDB_ID))



In [19]:
#had to capitalize gmx in the file bioinformatics so that it runs

os. getcwd()

'/home/jovyan/Bioinformatics'

In [20]:
class ResSelect(Select):
    def accept_residue(self, residue):
        if residue.get_resname() == LIGAND_CODE:
            return 1
        else:
            return 0

class NonHetSelect(Select):
    def accept_residue(self, residue):
        return 1 if residue.id[0] == " " else 0

io = PDBIO()
io.set_structure(structure)
io.save("ligand-{}.pdb".format(LIGAND_CODE), ResSelect())
io.save("protein-{}.pdb".format(TARGET_PDB_ID), NonHetSelect())



command = '../CBR_teaching/bin/lepro protein-{}.pdb'.format(TARGET_PDB_ID)
os.system(command)
shutil.move('pro.pdb','{}_prepped.pdb'.format(TARGET_PDB_ID))

'2gmx_prepped.pdb'

In [24]:
# combine protein and ligand files
filenames = [
'{}_prepped.pdb'.format(TARGET_PDB_ID),
"ligand-{}.pdb".format(LIGAND_CODE)
]
with open('{}-complex.pdb'.format(TARGET_PDB_ID), 'w') as outfile:
    for fname in filenames:
        with open(fname) as infile:
            for line in infile:
                if not "END" in line:
                    outfile.write(line)


with open('{}-complex.pdb'.format(TARGET_PDB_ID)) as f:
    view = nglview.show_file(f, ext="pdb")
    
view.center(LIGAND_CODE)
viewer.show_residues_around(view, selection=LIGAND_CODE)
view

#And part of the final steps of the alignment we found the off-target pdb ID 4w4v which corresponds to 
#the first sequence that is not an isoform of JNK-1, retrieved from BLAST 
#(JNK2/3 in complex with 3-(4-{[(2-chlorophenyl)carbamoyl]amino}-1H-pyrazol-1-yl)-N-(2 -methylpyridin-4-yl)benzamide 
#[Homo sapiens].
#4w4v

OFF_TARGET_PDB_ID = "4w4v" 
OFF_TARGET_LIGAND = "3H8"  

pdbl = PDBList()
pdbl.retrieve_pdb_file(OFF_TARGET_PDB_ID, pdir=OFF_TARGET_PDB_ID)

parser = MMCIFParser()
structure = parser.get_structure("TARGETPROT",'{}/{}.cif'.format(OFF_TARGET_PDB_ID,OFF_TARGET_PDB_ID))

class ResSelect(Select):
    def accept_residue(self, residue):
        if residue.get_resname() == OFF_TARGET_LIGAND:
            return 1
        else:
            return 0

io = PDBIO()
io.set_structure(structure)
io.save("ligand-{}.pdb".format(OFF_TARGET_LIGAND), ResSelect())
io.save("protein-{}.pdb".format(OFF_TARGET_PDB_ID), NonHetSelect())


command = '../CBR_teaching/bin/lepro protein-{}.pdb'.format(OFF_TARGET_PDB_ID)
os.system(command)
shutil.move('pro.pdb','{}_prepped.pdb'.format(OFF_TARGET_PDB_ID))


# combine protein and ligand files
filenames = [
'{}_prepped.pdb'.format(OFF_TARGET_PDB_ID),
"ligand-{}.pdb".format(OFF_TARGET_LIGAND)
]
with open('{}-complex.pdb'.format(OFF_TARGET_PDB_ID), 'w') as outfile:
    for fname in filenames:
        with open(fname) as infile:
            for line in infile:
                if not "END" in line:
                    outfile.write(line)


with open('{}-complex.pdb'.format(OFF_TARGET_PDB_ID)) as f:
    view = nglview.show_file(f, ext="pdb")
    
view.center(OFF_TARGET_LIGAND)
viewer.show_residues_around(view, selection=OFF_TARGET_LIGAND)
view

#We also replaced the names of the first and second structure

from Bio import pairwise2
from Bio.Seq import Seq 
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment

# Get the structures
PDBCODE_1 = '2gmx' # Name of the first structure
PDBCODE_2 = '4w4v' # Name of the second structure

import requests
data = requests.get(f'https://www.ebi.ac.uk/pdbe/api/pdb/entry/molecules/{PDBCODE_1}').json()[PDBCODE_1.lower()]
SEQ1 = (data[0]['sequence'])
SEQ1 = Seq(SEQ1)

data = requests.get(f'https://www.ebi.ac.uk/pdbe/api/pdb/entry/molecules/{PDBCODE_2}').json()[PDBCODE_2.lower()]
SEQ2 = (data[0]['sequence'])
SEQ2 = Seq(SEQ2)

alignments = pairwise2.align.globalxx(SEQ1, SEQ2)

for align1, align2, score, begin, end in alignments:
    filename = "alignment.fasta"
    with open(filename, "w") as handle:
        handle.write(">SEQ1\n%s\n>SEQ2\n%s\n" % (align1, align2))

print(alignments[0])

bio_align.run('{}-complex.pdb'.format(TARGET_PDB_ID),'{}-complex.pdb'.format(OFF_TARGET_PDB_ID))

NameError: name 'viewer' is not defined

In [25]:
# combine protein and ligand files
filenames = [
'{}-complex.pdb'.format(TARGET_PDB_ID,TARGET_PDB_ID),
"{}-complex_transformed.pdb".format(OFF_TARGET_PDB_ID)
]
with open('{}-{}-merged.pdb'.format(TARGET_PDB_ID,OFF_TARGET_PDB_ID), 'w') as outfile:
    for fname in filenames:
        with open(fname) as infile:
            for line in infile:
                if not "END" in line:
                    outfile.write(line)
                    
with open('{}-{}-merged.pdb'.format(TARGET_PDB_ID,OFF_TARGET_PDB_ID)) as f:
    view = nglview.show_file(f, ext="pdb")
    
view

NameError: name 'OFF_TARGET_PDB_ID' is not defined