# Model crystalographic structures using Modeller

- This notebook aimed to model missing loops inside the PDB structures of the target protein.

In [1]:
from pathlib import Path
from glob import glob
from prody import parsePDB
import sys
sys.path.append('../..')
from helper_modules.run_modeller import *
from helper_modules.get_pdb_ids_from_uniport import *

### Inputs
- Define some basic properties of the target protein.

In [2]:
prot_name  = 'cdk2'
uniprot_id = 'P24941'

In [3]:
seq_prot = get_seq_from_uniprot(uniprot_id)
print(seq_prot)
print(f'\nThere are {len(seq_prot)} residues.')

MENFQKVEKIGEGTYGVVYKARNKLTGEVVALKKIRLDTETEGVPSTAIREISLLKELNHPNIVKLLDVIHTENKLYLVFEFLHQDLKKFMDASALTGIPLPLIKSYLFQLLQGLAFCHSHRVLHRDLKPQNLLINTEGAIKLADFGLARAFGVPVRTYTHEVVTLWYRAPEILLGCKYYSTAVDIWSLGCIFAEMVTRRALFPGDSEIDQLFRIFRTLGTPDEVVWPGVTSMPDYKPSFPKWARQDFSKVVPPLDEDGRSLLSQMLHYDPNKRISAKAALAHPFFQDVTKPVPHLRL

There are 298 residues.


## Start the Modelling process
### Define the input and output directories

In [4]:
OUT_MAIN   = './pdb_structures'

# Get the list of input files
INPUT_DIR = f'{OUT_MAIN}/pdb_chains'
input_files = sorted(glob(f'{INPUT_DIR}/*pdb'))

# Define the output directory
OUTPUT_DIR = f'{OUT_MAIN}/pdb_modeled'
Path(OUTPUT_DIR).mkdir(parents = True, exist_ok = True)

In [None]:
# Model all molecules
for pdb_file in input_files:
    # Load the pdb file
    pdb_chain = parsePDB(pdb_file)
    
    # Run modeller
    run_modeller(
                 pdb_file = pdb_file, 
                 seq_prot = seq_prot, 
                 output_dir = OUTPUT_DIR, 
                 keep_original_resnum = True,
                 num_res_window = 2, 
                 max_var_iterations = 500, 
                 repeat_optimization = 2,
                 chid = 'A',
                 verbose = False
                )

['MET' 'MET' 'MET' ... 'LEU' 'LEU' 'LEU']
./pdb_structures/pdb_chains/1aq1_A.pdb
MENFQKVEKIGEGTYGVVYKARNKLTGEVVALKKIVPSTAIREISLLKELNHPNIVKLLDVIHTENKLYLVFEFLHQDLKKFMDASALTGIPLPLIKSYLFQLLQGLAFCHSHRVLHRDLKPQNLLINTEGAIKLADFGLEVVTLWYRAPEILLGCKYYSTAVDIWSLGCIFAEMVTRRALFPGDSEIDQLFRIFRTLGTPDEVVWPGVTSMPDYKPSFPKWARQDFSKVVPPLDEDGRSLLSQMLHYDPNKRISAKAALAHPFFQDVTKPVPHLRL

                         MODELLER 10.1, 2021/03/12, r12156

     PROTEIN STRUCTURE MODELLING BY SATISFACTION OF SPATIAL RESTRAINTS


                     Copyright(c) 1989-2021 Andrej Sali
                            All Rights Reserved

                             Written by A. Sali
                               with help from
              B. Webb, M.S. Madhusudhan, M-Y. Shen, G.Q. Dong,
          M.A. Marti-Renom, N. Eswar, F. Alber, M. Topf, B. Oliva,
             A. Fiser, R. Sanchez, B. Yerkovich, A. Badretdinov,
                     F. Melo, J.P. Overington, E. Feyfant
                 University of California, San Francisc

Finished!