In [13]:
import os, errno
from sys import platform
import importlib
import re
import shutil
import tarfile
# import nglview as nv
from Bio import pairwise2
from Bio import SeqIO
import MDAnalysis as mda
from MDAnalysis.analysis import align
from modeller_script.evaluate_modeller import evaluate_modeller

In [2]:
def create_directory(directory):
    try:
        os.makedirs(directory)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise
    
    return directory

In [3]:
def move_files(source, dest, file_name):
    files = os.listdir(source)
    for f in files:
        if re.match(file_name, f):
            shutil.move(source+f, dest)

In [14]:
DOWNLOADS = '/Users/evabertalan/Downloads'

vmd_mac = '/Applications/VMD\ 1.9.3.app/Contents/Resources/VMD.app/Contents/MacOS/VMD'
vmd_linux = '/net/opt/bin/vmd'
vmd = vmd_linux if platform == 'linux' else vmd_mac

chimera_mac = '/Applications/Chimera.app/Contents/MacOS/chimera'
chimra_linux = '/home/bertalae93/.local/UCSF-Chimera64-1.13.1/bin/chimera'
chimera = chimra_linux if platform == 'linux' else chimera_mac

### 1. prepare directory

In [6]:
PDB_CODE = '4N6H'
CHAIN = 'A'
ID = 'Na'

In [7]:
folder_name = PDB_CODE+ID
path = '../'+folder_name

In [8]:
directory = create_directory(path)

In [9]:
create_directory(directory+'/results')
create_directory(directory+'/results/plots')

'../4N6HNa/results/plots'

### 2. download pdb file and fasta sequence:

In [10]:
file_name = PDB_CODE+'.pdb'
! (cd $directory && curl -O https://files.rcsb.org/download/$file_name)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  624k    0  624k    0     0   471k      0 --:--:--  0:00:01 --:--:--  471k


In [11]:
fasta_path = directory+'/'+PDB_CODE+'.fasta'
fasta_url = '"'+'https://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=fastachain&compression=NO&structureId='+PDB_CODE+'&chainId='+CHAIN+'"'        
! curl -o $fasta_path $fasta_url

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   449    0   449    0     0    910      0 --:--:-- --:--:-- --:--:--   908


#### check pdb in chimera:

In [16]:
! $chimera $directory/$file_name

### 3. create sequence:

In [None]:
! (cd $directory && mod9.21 ../code/modeller_script/get_seq.py $PDB_CODE $CHAIN)

### 4. create alignement:

In [None]:
def create_alignment(pdb_code, chain, directory):
    sequence_path = directory+'/'+pdb_code+'.seq'
    ali_path = directory+'/alignment.ali'

    sequence = ''
    fasta = SeqIO.read(directory+'/'+PDB_CODE+'.fasta', 'fasta').seq

    with open(sequence_path) as sequence_file:
        alignment_file = open(ali_path, 'w+')
        for i, line in enumerate(sequence_file):
            if i < 3:
                alignment_file.write(line)
            else:
                sequence += line
                
        sequence = sequence[:-2]        
        alignments = pairwise2.align.globalxx(fasta, sequence)

        alignment_file.write(alignments[0][1])
        alignment_file.write('*\n')
        alignment_file.write('>P1;'+pdb_code+'_fill\n')
        alignment_file.write('sequence:::::::::\n')
        alignment_file.write(str(fasta)+'*')
        alignment_file.close()

In [None]:
create_alignment(PDB_CODE, CHAIN, directory)

### 5. compose template and target sequence:
	template: >P1;6b73
				structureX:6b73:
				- - - for missing residues
	target: >P1;6b73_fill
				sequence:::::::::
				insert residues where it missing;
				- - - for residues what are not needed

In [None]:
! open -e $ali_path
! /Applications/Chimera.app/Contents/MacOS/chimera $directory/$file_name

### 6. run modeller:
 * knowns = sys.argv[1]
 * sequence = sys.argv[2]
 * num_models = sys.argv[3]
 * num_loops = sys.argv[4]

In [None]:
pdb_fill = PDB_CODE+'_fill'
! (cd $directory && mod9.21 ../code/modeller_script/loopmodel.py $PDB_CODE $pdb_fill 20 4)

### 7. evaluate modeller result:

In [None]:
file_name = pdb_fill+'.'
best_model, best_loop = evaluate_modeller(file_name, loop=False)

In [None]:
! /Applications/Chimera.app/Contents/MacOS/chimera $directory/$best_model $directory/$best_loop

In [None]:
def copy_best(source, dest, file_name, best):
    files = os.listdir(source)
    for f in files:
        if re.match(best, f):
            shutil.copy(source+'/'+f, dest+file_name+'pdb')

In [None]:
model_folder = create_directory(directory+'/models')
move_files(directory+'/', model_folder, file_name, best_model)
copy_best(model_folder, directory+'/', file_name, best_model)
copy_best(model_folder, directory+'/', file_name[:-1]+'_loop.', best_loop)

In [None]:
! cp ./modeller_script/loopmodel.log $model_folder

### 8. upload to OPM:
PDB_CODE_fill.pdb
https://opm.phar.umich.edu/ppm_server

In [17]:
new_opm_name = directory+'/'+PDB_CODE+'_fill_opm.pdb'
! cp $DOWNLOADS/*_fill.pdb $new_opm_name

cp: cannot stat '/Users/evabertalan/Downloads/*_fill.pdb': No such file or directory


In [None]:
new_loop_opm_name = directory+'/'+PDB_CODE+'_fill_loop_opm.pdb'
! cp $DOWNLOADS/*_fill_loop.pdb $new_loop_opm_name

In [None]:
! rm $DOWNLOADS/*_fill.pdb
! rm $DOWNLOADS/*_fill_loop.pdb

### 9. compare the oriented structure with the original opm:

In [18]:
original_opm = '../opm/'+PDB_CODE+'.pdb'

In [None]:
! /Applications/Chimera.app/Contents/MacOS/chimera $directory/$new_opm_name $original_opm

### 10. remove HETATMs from pdb:

In [23]:
# new_opm_name = '../4N6HNa/4N6HNa_opm.pdb'

In [24]:
with open(new_opm_name) as opm_file:
    input_file = open(directory+'/'+PDB_CODE+'_inp.pdb', 'w+')
    for i, line in enumerate(opm_file):
        if not re.match('HETATM', line):
            input_file.write(line)
    input_file.write('END')
    input_file.close()

In [25]:
inp_file = PDB_CODE+'_inp.pdb'
! open -e $directory/$inp_file

Couldn't get a file descriptor referring to the console


In [None]:
view = nv.show_file(directory+'/'+PDB_CODE+'_inp.pdb')
view

### 11. upload to charmm-gui:
http://www.charmm-gui.org/?doc=input/membrane

#### after step 3 check packing:

In [None]:
tar = tarfile.open(DOWNLOADS+'/charmm-gui.tgz', 'r:gz')
for member in tar.getmembers():
    if re.search('step3_packing.pdb', member.name):
        f = tar.extract(member, 'temp')
f = [i for i in os.listdir('temp') if re.match('charmm-gui', i)]
step3_pdb = 'temp/'+f[0]+'/step3_packing.pdb'

In [None]:
! /Applications/Chimera.app/Contents/MacOS/chimera $step3_pdb $original_opm

In [None]:
! rm $DOWNLOADS/charmm-gui.tgz

### 12. prepare charmm-gui to NAMD 

In [None]:
! cp $DOWNLOADS/charmm-gui.tgz $directory

In [None]:
tar = tarfile.open(directory+'/charmm-gui.tgz', 'r:gz')
tar.extractall(directory)
charmm_folder = [i for i in os.listdir(directory) if re.match('charmm-gui-', i)][0]
namd_folder = directory+charmm_folder+'/namd/'
inp_files = sorted([namd_folder+i for i in os.listdir(namd_folder) if re.match(r'(step6.).*\_equilibration.inp$', i)])
prod_file = namd_folder+'step7.1_production.inp'

* create folder named: FOLDERANAME_inp
* and copy all required files for namd on cluster
* copy folder to cluster
* run simulation

In [None]:
PMEGridSize = '120'
langevinDamping = '5.0'

In [None]:
def write_namd_input(inp_files):
    for inp_file in inp_files:
        with open(inp_file, 'r+') as f:
            content = f.readlines()
            output = ''
            for i, line in enumerate(content):
                if re.match('wrapWater', line):
                    output += 'wrapWater   off \n'

                elif re.match('wrapAll', line):
                    output += 'wrapAll   off \n'

                elif re.match('wrapNearest', line):
                    output += 'wrapNearest   off \n'

                elif re.match('PMEGridSpacing', line):
                    output += 'PMEGridSizeX   '+PMEGridSize+' \n'
                    output += 'PMEGridSizeY   '+PMEGridSize+' \n'
                    output += 'PMEGridSizeZ   '+PMEGridSize+' \n'

                elif re.match('langevinDamping', line):
                    output += 'langevinDamping   '+langevinDamping+' \n'

                else:
                    output += line
                print(line)
            f.seek(0)
            f.write(output)
            f.truncate()

In [None]:
write_namd_input(inp_files)

In [None]:
# write_namd_input([prod_file])

### 13. create job.sh

In [None]:
def create_job_script(cluster, step):
    files = os.listdir('job_scripts')
    file_name = cluster+'_'+step+'.sh'
    for f in files:
        if re.match(file_name, f):
            shutil.copy('job_scripts/'+file_name, namd_folder+step+'_job.sh')

* cluster = 'hlrn', 'leonard'
* step = 'eq', 'prod'

In [None]:
 create_job_script('hlrn', 'eq')

### 14. upload charrm folder to cluster

* check input files
* set numsteps
* update job.sh
* scp to cluster
* run namd

In [26]:
print(len('SPGARSASSLALAIAITALYSAVCAVGLLGNVLVMFGIVRYTKMKTATNIYIFNLALADALATSTLPFQSAKYLMETWPFGELLCKAVLSIDYYNMFTSIFTLTMMSVDRYIAVCHPVKALDFRTPAKAKLINICIWVLASGVGVPIMVMAVTRPRDGAVVCMLQFPSPSWYWDTVTKICVFLFAFVVPILIITVCYGLMLLRLRSVRLLSGSKEKDRSLRRITRMVLVVVGAFVVCWAPIHIFVIVWTLVDIDRRDPLVVAALHLCIALGYANSSLNPVLYAFLD'))

286


In [27]:
print(len('SPGARSASSLALAIAITALYSAVCAVGLLGNVLVMFGIVRYTKMKTATNIYIFNLALADALATSTLPFQSAKYLMETWPFGELLCKAVLSIDYYNMFTSIFTLTMMSVDRYIAVCHPVKALDFRTPAKAKLINICIWVLASGVGVPIMVMAVTRPRDGAVVCMLQFPSPSWYWDTVTKICVFLFAFVVPILIITVCYGLMLLRLRSVRLLSGSKEKDRSLRRITRMVLVVVGAFVVCWAPIHIFVIVWTLVDIDRRDPLVVAALHLCIALGYANSSLNPVLYAFLD'))

286
