# Model crystalographic structures using Modeller

In [1]:
from pathlib import Path
from glob import glob
from prody import parsePDB
import sys
sys.path.append('../..')
from helper_modules.run_modeller import *
from helper_modules.get_pdb_ids_from_uniport import *

- Define some basic properties of the target protein.

In [2]:
prot_name       = 'fxa'
uniprot_id      = 'P00742'

In [3]:
seq_prot = get_seq_from_uniprot(uniprot_id)
print(seq_prot)
print(f'\nThere are {len(seq_prot)} residues.')

MGRPLHLVLLSASLAGLLLLGESLFIRREQANNILARVTRANSFLEEMKKGHLERECMEETCSYEEAREVFEDSDKTNEFWNKYKDGDQCETSPCQNQGKCKDGLGEYTCTCLEGFEGKNCELFTRKLCSLDNGDCDQFCHEEQNSVVCSCARGYTLADNGKACIPTGPYPCGKQTLERRKRSVAQATSSSGEAPDSITWKPYDAADLDPTENPFDLLDFNQTQPERGDNNLTRIVGGQECKDGECPWQALLINEENEGFCGGTILSEFYILTAAHCLYQAKRFKVRVGDRNTEQEEGGEAVHEVEVVIKHNRFTKETYDFDIAVLRLKTPITFRMNVAPACLPERDWAESTLMTQKTGIVSGFGRTHEKGRQSTRLKMLEVPYVDRNSCKLSSSFIITQNMFCAGYDTKQEDACQGDSGGPHVTRFKDTYFVTGIVSWGEGCARKGKYGIYTKVTAFLKWIDRSMKTRGLPKAKSHAPEVITSSPLK

There are 488 residues.


<h3 style='color: black; background-color: #F9E5AB; padding: 5px;'>
    Important!
</h3>

- We will used a subsequece of the FXa protein, as we are only interested in modeling the protein's active site.

In [4]:
seq_prot = '''IVGGQECKDGECPWQALLINEENEGFCGGTILSEFYILTAAHCLYQAKR
              FKVRVGDRNTEQEEGGEAVHEVEVVIKHNRFTKETYDFDIAVLRLKTPI
              TFRMNVAPACLPERDWAESTLMTQKTGIVSGFGRTHEKGRQSTRLKMLE
              VPYVDRNSCKLSSSFIITQNMFCAGYDTKQEDACQGDSGGPHVTRFKDT
              YFVTGIVSWGEGCARKGKYGIYTKVTAFLKWIDRSMKTRGLPKAKS'''
seq_prot = seq_prot.replace('\n', '').replace(' ', '')
len(seq_prot)

242

## Start the Modelling process
### Define the input and output directories

In [5]:
OUT_MAIN   = './pdb_structures'

# Get the list of input files
INPUT_DIR = f'{OUT_MAIN}/pdb_chains'
input_files = sorted(glob(f'{INPUT_DIR}/*pdb'))

# Define the output directory
OUTPUT_DIR = f'{OUT_MAIN}/pdb_modeled'
Path(OUTPUT_DIR).mkdir(parents = True, exist_ok = True)

In [6]:
# Model all molecules
for pdb_file in input_files:
    # Load the pdb file
    pdb_chain = parsePDB(pdb_file)
    
    # Run modeller
    run_modeller(
                 pdb_file = pdb_file, 
                 seq_prot = seq_prot, 
                 output_dir = OUTPUT_DIR, 
                 keep_original_resnum = True,
                 num_res_window = 2, 
                 max_var_iterations = 500, 
                 repeat_optimization = 2,
                 chid = 'A',
                 verbose = False
                )

['ILE' 'ILE' 'ILE' ... 'LYS' 'LYS' 'LYS']
./pdb_structures/pdb_chains/1c5m_A.pdb
IVGGQECKDGECPWQALLINEENEGFCGGTILSEFYILTAAHCLYQAKRFKVRVGDRNTEQEEGGEAVHEVEVVIKHNRFTKETYDFDIAVLRLKTPITFRMNVAPACLPERDWAESTLMTQKTGIVSGFGRTHEKGRQSTRLKMLEVPYVDRNSCKLSSSFIITQNMFCAGYDTKQEDACQGDSGGPHVTRFKDTYFVTGIVSWGEGCARKGKYGIYTKVTAFLKWIDRSMKTRGLPKAK

                         MODELLER 10.1, 2021/03/12, r12156

     PROTEIN STRUCTURE MODELLING BY SATISFACTION OF SPATIAL RESTRAINTS


                     Copyright(c) 1989-2021 Andrej Sali
                            All Rights Reserved

                             Written by A. Sali
                               with help from
              B. Webb, M.S. Madhusudhan, M-Y. Shen, G.Q. Dong,
          M.A. Marti-Renom, N. Eswar, F. Alber, M. Topf, B. Oliva,
             A. Fiser, R. Sanchez, B. Yerkovich, A. Badretdinov,
                     F. Melo, J.P. Overington, E. Feyfant
                 University of California, San Francisco, USA
                    Rockefell

Finished!