# Prepare protein conformations using `pdb4amber`

- After modeling the missing regions of the PDB structures, we will use `pdb4amber` to add missing atoms.

In [1]:
from pathlib import Path
from glob import glob
import pdb4amber
import os

- List the input files.
- Define the output directory.

In [2]:
OUT_MAIN   = './pdb_structures'

# Get the list of input files from modeled pdbs
INPUT_DIR = f'{OUT_MAIN}/pdb_modeled'
input_files = sorted(glob(f'{INPUT_DIR}/*pdb'))

# Define the output directory
OUTPUT_DIR = f'{OUT_MAIN}/pdb_prepared'
Path(OUTPUT_DIR).mkdir(parents = True, exist_ok = True)

<h3 style='color: black; background-color: #F9E5AB; padding: 5px;'>
    Important!
</h3>

#### For EGFR only:

Just for this protein we will remove the first 35 residues of all models, as this region was not present in all pdb entries and it was not correctly modeled. We will assume that removing this region will have no impact during the docking simulations.

#### Visualize the region to be removed

In [3]:
import nglview as nv
import pytraj as pyt
import numpy as np
import prody

ref_struc_id    = '7a2a'
ref_ligand_name = '7G9'
ref_prot = prody.parsePDB(ref_struc_id).select('chain A')
pocket_sel = ref_prot.select(
    f'within 7 of resname {ref_ligand_name} and protein')
start_position = 675
pocket_residues = [str(i - start_position) 
                   for i in np.unique(
                       pocket_sel.getResnums())
                  ]
pocket_residues_str = ' '.join(pocket_residues)
lig_resnum = ref_prot.select(
                       f'resname {ref_ligand_name}'
                 ).getResnums()[0]

# Load the models using pytraj
crys_ensamble = pyt.iterload(input_files, top = input_files[0])
view = nv.show_pytraj(crys_ensamble)
view.clear_representations()
view.add_cartoon(selection = '%s' % list(range(0, 275)), color = 'white')
view.add_cartoon(selection = '%s' % list(range(0, 35)), color = 'red')
view.add_licorice(
    selection = pocket_residues_str, 
    color='cyan')
view



NGLWidget(max_frame=174)

In [7]:
OUTPUT_DIR_TEMP = f'{OUT_MAIN}/pdb_prepared_temp'
Path(OUTPUT_DIR_TEMP).mkdir(parents = True, exist_ok = True)

for i, file in enumerate(input_files):
    name = file.split('/')[-1]
    pyt.write_traj(filename = f'{OUTPUT_DIR_TEMP}/{name}',
                   traj = crys_ensamble[':35-275'],
                   frame_indices = [i])

### Perform the preparation

In [8]:
# Give the saved structures as input
input_files = sorted(glob(f'{OUTPUT_DIR_TEMP}/*pdb'))

for pdb_file in input_files:
    pdb_id = Path(pdb_file).stem.rsplit('_')[0]
    print(F"Preparing the {pdb_id} structure.")

    ensamble_file = f'{OUTPUT_DIR}/{pdb_id}_ENS.pdb'
    pdb4amber.run(arg_pdbin = pdb_file,
                  arg_add_missing_atoms = True, 
                  arg_pdbout = ensamble_file)
    # Remove the unnecessary files
    os.remove(f'{OUTPUT_DIR}/{pdb_id}_ENS_nonprot.pdb')
    os.remove(f'{OUTPUT_DIR}/{pdb_id}_ENS_sslink')
    os.remove(f'{OUTPUT_DIR}/{pdb_id}_ENS_renum.txt')

Preparing the 1m14 structure.
Preparing the 1m17 structure.
Preparing the 1xkk structure.
Preparing the 2eb2 structure.
Preparing the 2eb3 structure.
Preparing the 2gs2 structure.
Preparing the 2gs6 structure.
Preparing the 2gs7 structure.
Preparing the 2itn structure.
Preparing the 2ito structure.
Preparing the 2itp structure.
Preparing the 2itq structure.
Preparing the 2itt structure.
Preparing the 2itu structure.
Preparing the 2itv structure.
Preparing the 2itw structure.
Preparing the 2itx structure.
Preparing the 2ity structure.
Preparing the 2itz structure.
Preparing the 2j5e structure.
Preparing the 2j5f structure.
Preparing the 2j6m structure.
Preparing the 2jit structure.
Preparing the 2jiu structure.
Preparing the 2jiv structure.
Preparing the 2rf9 structure.
Preparing the 2rfd structure.
Preparing the 2rfe structure.
Preparing the 2rgp structure.
Preparing the 3bel structure.
Preparing the 3gop structure.
Preparing the 3gt8 structure.
Preparing the 3ika structure.
Preparing 