Initialisation for IsoMIF

In [13]:
import subprocess
from pathlib import Path
import os

def make_dir(dirList):
    for dirName in dirList:
        if not os.path.exists(dirName):
            os.mkdir(dirName)
            print("Directory " , dirName ,  " Created ")
        else:
            pass
            #print("Directory " , dirName ,  " already exists")

def cmd(command):
    subprocess.check_output(command, shell=True)

def make_hive(ROOT):
    """
    Create all needed directories
    """
    List = []
    List.append(ROOT / 'hive')
    for last_name in ('clefts', 'match', 'matchView', 'mifs', 'mifView', 'pdb'):
        List.append(ROOT / 'hive' / last_name)
    make_dir(List)
    
    
ROOT = Path('.')

HIVE = ROOT / 'hive'
GSL_PATH = Path('/media/anton/b8150e49-6ff0-467b-ad66-40347e8bb188/anton/BACHELOR/gsl')
PDB_PATH = ROOT / 'hive' / 'pdb'
GET_CLEFT_PATH = ROOT / 'Get_Cleft-master'
ISOMIF_PATH = ROOT / 'IsoMif-master'

SYSTEM_NAME = 'linux_x86_64'
REDUCE_PATH = ISOMIF_PATH / 'reduce.3.23.130521'
MIF_NAME = 'mif_' + SYSTEM_NAME + '_compiled'
ISOMIF_NAME = 'isomif_' + SYSTEM_NAME + '_compiled'

In [3]:
make_hive(ROOT)

downloading pdbs from uniprot id

In [52]:
import pandas as pd
import os
import requests
from pathlib import Path

def download_url(url, path, name):
    ''' saving from url to path/name, making path directory, if not existed
    '''
    r = requests.get(url, allow_redirects=True)
    paths = []
    paths.append(str(path))
    make_dir(paths)
    open(os.path.join(paths[0], name), 'wb').write(r.content)
    
    
Uniprot_list = ('P00533', 'A0A023GPJ0')
for uniprot in Uniprot_list:
    # get list of .pdb including this uniprot
    path_uniprot = str(PDB_PATH / uniprot)
    name = uniprot + '_pdbs.txt'
    url = 'https://www.uniprot.org/uploadlists/?from=ID&to=PDB_ID&format=list&query=' + uniprot
    download_url(url, path_uniprot, name)
    full_name = os.path.join(path_uniprot, name)
    
    #
    df = pd.read_csv(full_name, sep=" ", header=None)
    for index, pdb in df.iterrows():
        name = pdb[0] + ".pdb"
        full_name = os.path.join(path_uniprot, name)
        url = "https://files.rcsb.org/download/" + pdb[0] + ".pdb"
        
        config = Path(full_name)
        # checking if .pdb is already downloaded, if not - downloading
        if not config.is_file():
            download_url(url, path_uniprot, name)

# IsoMIF part

## Compilation of all in IsoMif

In [2]:
# compile Get Cleft
cmd('gcc ' + str(GET_CLEFT_PATH / "Get_Cleft.c") + ' -o Get_Cleft -O3 -lm')

# compile mif and isomif for linux_x86_64, needs path to gsl
cmd('g++ ' + str(ISOMIF_PATH / 'mif.cpp') + ' -o ' + str(ISOMIF_PATH / MIF_NAME) + '-O3 -lm')
cmd('g++ ' + str(ISOMIF_PATH / 'isomif.cpp') + ' -o ' + str(ISOMIF_PATH / ISOMIF_NAME) + \
    ' -O3 -lm -lgsl -lgslcblas -L' +  str(GSL_PATH / 'lib') + ' -I ' + str(GSL_PATH /'include'))

## Finding N the biggest cavities

In [9]:
N_cavities = 3
pdbs = ['1E8X', '1RDQ']
for pdb in pdbs:
    cmd(str(GET_CLEFT_PATH / 'Get_Cleft') + ' -p ' + str(PDB_PATH / (pdb +'.pdb')) + ' -o ' + \
        str(HIVE / 'clefts' / pdb) + ' -s -t ' + str(N_cavities))

with selection of the contact residue

In [34]:
pdbs = ['1E8X', '1RDQ']
ligands = ['ATP3000A-', 'ATP600EB']

# make clefts
#./Get_Cleft-master/Get_Cleft -p ./hive/pdb/1E8X.pdb -o ./hive/clefts/1E8X -s -t 5 
for (pdb, lig) in zip(pdbs, ligands):
    print(pdb, lig)
    cmd(str(GET_CLEFT_PATH / 'Get_Cleft') + ' -p ' + str(PDB_PATH / (pdb +'.pdb')) + ' -o ' + \
            str(HIVE / 'clefts' / pdb) + ' -s -a ' + lig)

1E8X ATP3000A-
1RDQ ATP600EB


## Add hydrogens

In [36]:
# add hydrogens
for pdb in pdbs:
    cmd(str(REDUCE_PATH) + ' -p ' + str(PDB_PATH / (pdb + '.pdb')) + ' > ' + str(PDB_PATH / (pdb + 'h.pdb')))

## Calculating MIF

if knowing residue

In [41]:
# calculate MIF
k = 0 # make search by suffix?
for (pdb, lig) in zip(pdbs, ligands):
    k += 1
    cmd(str(ISOMIF_PATH / MIF_NAME) + ' -p ' + str(PDB_PATH / (pdb + 'h.pdb')) + ' -g ' + \
        str(HIVE / 'clefts' / (pdb + '_' + lig + '_sph_' + str(k) + '.pdb')) + ' -o ' + \
        str(HIVE / 'mifs') + ' -l ' + lig + ' -r 3 -t ' + pdb)

with grid accuracy, resolution 

0 - 2 Angstroms 

1 - 1.5 Angstroms 

2 - 1.0 Angstroms 

3 - 0.5 Angstroms

In [12]:
# calculate MIF
#k = 0 # make search by suffix?
for pdb in pdbs:
    for i in range(1, N_cavities):
        cmd(str(ISOMIF_PATH / MIF_NAME) + ' -p ' + str(PDB_PATH / (pdb + 'h.pdb')) + ' -g ' + \
            str(HIVE / 'clefts' / (pdb + '_sph_' + str(i) + '.pdb')) + ' -o ' + \
            str(HIVE / 'mifs') + ' z  1')
        # file for visualisation
        cmd('perl ' + str(ISOMIF_PATH / 'mifView.pl') + ' -m ' + str(HIVE / 'mifs' / (pdb + 'h.mif')) \
           + ' -o ' + str(HIVE / 'mifView'))

## Calculating IsoMIF

In [58]:
# calculate ISOMIF
pdbh1 = '1E8Xh'
pdbh2 = '1RDQh'
cmd(str(ISOMIF_PATH / ISOMIF_NAME) \
    + ' -p1 ' + str(HIVE / 'mifs' / (pdbh1 + '.mif')) \
    + ' -p2 ' + str(HIVE / 'mifs' / (pdbh2 + '.mif')) + ' -o ' + os.path.join(str(HIVE / 'match'), '') + ' -c 1 -d 2.0')

In [59]:
print(str(HIVE / 'match' / (pdbh1 + '_match_' + pdbh2 + '.isomif')))
cmd('perl ' + str(ISOMIF_PATH / 'isoMifView.pl') + ' -m ' \
    + str(HIVE / 'match' / (pdbh1 + '_match_' + pdbh2 + '.isomif')) + ' -o ' \
    + os.path.join(str(HIVE / 'matchView'), '') + ' -g 1')

hive/match/1E8Xh_match_1RDQh.isomif


Find Morimoto coefficient from file

In [73]:
import mmap

with open(str(HIVE / 'match' / '1E8Xh_match_1RDQh.isomif'), 'rb', 0) as file, \
     mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ) as s:
    if s.find(b'TANIM') != -1:
        pos = s.find(b'TANIM')
        tanim = float(s[pos+6 : pos+12].decode("utf-8"))

0.0369
