In [1]:
import MDAnalysis as mda
import numpy as np
import os
from utils import *
import warnings 

# Suppress warnings specific to MDAnalysis
warnings.filterwarnings("ignore", category=UserWarning, module="MDAnalysis")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def atom_clash_objective(universe,atom_indices,threshold):
    # Create an AtomGroup for the specified atom indices
    nearby_atoms = universe.select_atoms(f"around " + str(threshold) + f" index {' '.join(map(str, atom_indices))}")
    return len(nearby_atoms)

In [3]:
def determine_index_shift(original_u,merged_u,original_indexes):
    num_atom_diff = len(merged_u.atoms)-len(original_u.atoms)
    # if original atoms were added first
    if np.all(merged_u.atoms[0].position == original_u.atoms[0].position):
        return 0
    elif np.all(merged_u.atoms[num_atom_diff].position == original_u.atoms[0].position):
        return num_atom_diff
    else:
        return None


In [4]:
# read in each of the substrate files
directory = 'substrates_initial/'
file_names = [f for f in os.listdir(directory)]
#file_names = ['0.pdb']
# Defined in the array below are the distances away from ThDP atoms to substrate atoms 
# determine by the optimized structure of substrate 6 and are used to place the aka head atoms
# radii are in order of (columns) C1, N1, N2, S1 and then rows (C2, C3, O1) 
# so C1 is (0,0) Angstroms away from C2 
radii = [
    [1.539,2.562,3.389,2.880],
    [2.533,3.205,4.764,3.784],
    [2.393,2.973,2.592,3.893]
]

In [5]:
# load receptor universe and extract ThDP atoms 
receptor = mda.Universe('int1_receptor.pdb')
ThDP_residue = receptor.select_atoms("resname TPP")
receptor_only = receptor.select_atoms(f"not resname TPP")

# Save the updated universe to a new PDB file
output_filename = 'substrates_aligned/ThDP_alone.pdb'
ThDP_residue.atoms.write(output_filename)

ThDP_important_indexes = get_ThDP_indexes(ThDP_residue)

# get the coordinates of important atoms in ThDP
C1_coords = get_atom_position(ThDP_residue,ThDP_important_indexes['C1'])
N1_coords = get_atom_position(ThDP_residue,ThDP_important_indexes['N1'])
N2_coords = get_atom_position(ThDP_residue,ThDP_important_indexes['N2'])
S1_coords = get_atom_position(ThDP_residue,ThDP_important_indexes['S1'])

# ThDP C1, N1, N2, S1 atom coords that will be treated as sphere centers 
centers = np.array([C1_coords,N1_coords,N2_coords,S1_coords])

# we will use the average vector of S1C1 and N1C1 to get a guess of where C2 should be located
vector_S1_to_C1 = C1_coords - S1_coords
vector_N1_to_C1 = C1_coords - N1_coords
avg_vector = (vector_S1_to_C1 + vector_N1_to_C1)/2
unit_vector = avg_vector / np.linalg.norm(avg_vector)
guess_C2 = C1_coords + unit_vector * 1.54 # C2 should be located 1.54 A away from C1


In [7]:
# iterate through all substrates to align to the int1 geometry
for curr_file_name in file_names:
    print(curr_file_name)
    # load substrate universe
    file_start = curr_file_name.split('.')[0]
    substrate = mda.Universe(directory+curr_file_name)
    # identify the atoms that comprise the aka substrates 

    substrate_important_indexes = get_substrate_aka_indexes(substrate.atoms)
    # initial coords of C2, C3, and O1 
    initial_positions = [get_atom_position(substrate,substrate_important_indexes['C2']),get_atom_position(substrate,substrate_important_indexes['C3']),get_atom_position(substrate,substrate_important_indexes['O1'])]

    # we go through two rounds of optimization, first using the guess location of 
    # C2 as the starting position for each atom we are trying to place (C2,C3,O1) 
    initial_guess = np.hstack([guess_C2 for i in range(3)])
    C2_optimized, C3_optimized, O1_optimized = optimize_points(centers, initial_guess, radii)
    all_optimized = [C2_optimized, C3_optimized, O1_optimized]

    # get the final error for each atom's position 
    C2_err = atom_objective(C2_optimized, centers, radii[0])
    C3_err = atom_objective(C3_optimized, centers, radii[1])
    O1_err = atom_objective(O1_optimized, centers, radii[2])

    # use the atom with the minimum error for the next round of optimization
    all_errors = [C2_err,C3_err,O1_err]
    min_error_index = all_errors.index(min(all_errors))
    redo_initial_guess = np.hstack([all_optimized[min_error_index] for i in range(3)])
    C2_reoptimized, C3_reoptimized, O1_reoptimized = optimize_points(centers, redo_initial_guess, radii)
    
    final_positions = [C2_reoptimized, C3_reoptimized, O1_reoptimized]

    # Get the rotation and translation matrix from our initial substrate to our int1 geoemtry
    R, t = kabsch_algorithm(initial_positions,final_positions)
    
    # make a copy of the substrate object and update atom positions by aligning aka head atoms
    substrate_aka_aligned = substrate.copy()
    for i in range(0,len(substrate_aka_aligned.atoms.positions)):
        atom_coords = substrate_aka_aligned.atoms[i].position
        new_coords = np.dot(R, atom_coords) + t
        substrate_aka_aligned.atoms[i].position = new_coords

    # get the updated coords for important atoms 
    C2_coords = substrate_aka_aligned.atoms.positions[substrate_important_indexes['C2']]
    O1_coords = substrate_aka_aligned.atoms.positions[substrate_important_indexes['O1']]
    C3_coords = substrate_aka_aligned.atoms.positions[substrate_important_indexes['C3']]
    R_coords =  substrate_aka_aligned.atoms.positions[substrate_important_indexes['R']]

    # target angles represents the optimized angles of C1-C2-R ,O1-C2-R ,C3-C2-R
    # we will use these to help us reposition the R group first atom 
    target_angles = [111.1,110.2,107.5]
    R_coords_opt = optimize_angles(R_coords,C1_coords,C2_coords,O1_coords,C3_coords,target_angles)

    # get a rotation and translation matrix from the original to the optimized R location 
    R_tail, t_tail = kabsch_algorithm([R_coords,C2_coords],[R_coords_opt,C2_coords])
    
    # apply rotation and translation transformation to only the tail atoms 
    substrate_tail_atom_indexes = [i for i in range(0,len(substrate.atoms)) if i not in substrate_important_indexes.values()]
    substrate_tail_atom_indexes.append(substrate_important_indexes['R'])

    # make a copy of the substrate object and update tail atom positions
    substrate_aligned = substrate_aka_aligned.copy()
    for i in substrate_tail_atom_indexes:
        atom_coords = substrate_aligned.atoms[i].position
        new_coords = np.dot(R_tail, atom_coords) + t_tail
        substrate_aligned.atoms[i].position = new_coords

    # add the substrate and ThDP to a single INI universe
    ini_universe = mda.Merge(substrate_aligned.atoms,ThDP_residue.atoms)
    for atom in ini_universe.atoms:
        atom.residue.resid = 1
        atom.residue.resname = "INI"
        atom.record_type = "HETATM"
    
    # determine hwo the indexes of INI and ThDP have shifted and consolidate dictionary
    ini_substrate_shift = determine_index_shift(substrate_aligned,ini_universe,substrate_tail_atom_indexes)
    ini_ThDP_shift = determine_index_shift(ThDP_residue,ini_universe,list(ThDP_important_indexes.values()))

    ini_tail_atom_indexes = [i + ini_substrate_shift for i in substrate_tail_atom_indexes]
    ini_important_indexes = {}
    for key in substrate_important_indexes:
        ini_important_indexes[key] = substrate_important_indexes[key] + ini_substrate_shift

    for key in ThDP_important_indexes:
        ini_important_indexes[key] = ThDP_important_indexes[key] + ini_ThDP_shift

    # write ini to a new file
    ini_universe.atoms.write("substrates_aligned/"+file_start+"_ini.pdb")

    # add ini and receptor to the same universe to form the full complex
    complex = mda.Merge(receptor_only.atoms,ini_universe.atoms)
    
    # determine how much the indexing has changed and update dictionary
    complex_shift = determine_index_shift(ini_universe,complex,substrate_tail_atom_indexes)
    complex_tail_atom_indexes = [i + complex_shift for i in ini_tail_atom_indexes]

    for key in ini_important_indexes:
        ini_important_indexes[key] = ini_important_indexes[key] + complex_shift

    # get the atoms involved in the bond that we are going to rotate around (carbonyl carbon to first atom in the tail)
    atom1_index = ini_important_indexes['C2'] 
    atom2_index = ini_important_indexes['R']  
    complex_tail_atom_indexes.remove(atom2_index)
    atoms_to_rotate =  complex_tail_atom_indexes 
    
    # fix the R atom bonded to C2 and allow the tail atoms to rotate to minimize clashes 
    pdb_indexes = [i+1 for i in complex_tail_atom_indexes]
    degrees_rotated = 10
    rotated_complex = complex.copy()
    threshold = 1.2
    num_clash = atom_clash_objective(complex,pdb_indexes,threshold)

    while num_clash > 0:
        if degrees_rotated >= 360:
            degrees_rotated = 10
            threshold -= 0.1
            
        rotated_complex = rotate_atoms(rotated_complex, atom1_index, atom2_index, atoms_to_rotate, degrees_rotated)
        num_clash = atom_clash_objective(rotated_complex,pdb_indexes,threshold)
        degrees_rotated += 10

    receptor = rotated_complex.select_atoms("not resname INI and not resname WAT")
    receptor_file_name = "complexes/"+file_start+"/receptor.pdb"
    receptor.atoms.write(receptor_file_name)

    water = rotated_complex.select_atoms("resname WAT")
    water_file_name = "complexes/"+file_start+"/water.pdb"
    water.atoms.write(water_file_name)

    with open(receptor_file_name , 'r') as infile:
        lines = infile.readlines()

    with open(receptor_file_name , 'w') as outfile:
        for line in lines:
            outfile.write(line)
            if " OXT " in line:  # Check if the line contains the atom name "OXT"
                outfile.write("TER\n")

    ini_final = rotated_complex.select_atoms("resname INI")
    ini_final.atoms.write("complexes/"+file_start+"/ini.pdb")



0.pdb
NOT CONVERGED
CONVERGED
NOT CONVERGED
1.pdb
NOT CONVERGED
CONVERGED
CONVERGED
10.pdb
NOT CONVERGED
CONVERGED
CONVERGED
11.pdb
NOT CONVERGED
CONVERGED
NOT CONVERGED
12.pdb
NOT CONVERGED
CONVERGED
CONVERGED
13.pdb
NOT CONVERGED
CONVERGED
CONVERGED
14.pdb
NOT CONVERGED
CONVERGED
CONVERGED
15.pdb
NOT CONVERGED
CONVERGED
CONVERGED
16.pdb
NOT CONVERGED
CONVERGED
CONVERGED
17.pdb
NOT CONVERGED
CONVERGED
CONVERGED
18.pdb
NOT CONVERGED
CONVERGED
CONVERGED
19.pdb
NOT CONVERGED
CONVERGED
CONVERGED
2.pdb
NOT CONVERGED
CONVERGED
CONVERGED
3.pdb
NOT CONVERGED
CONVERGED
CONVERGED
4.pdb
NOT CONVERGED
CONVERGED
NOT CONVERGED
5.pdb
NOT CONVERGED
CONVERGED
CONVERGED
6.pdb
NOT CONVERGED
CONVERGED
NOT CONVERGED
7.pdb
NOT CONVERGED
CONVERGED
CONVERGED
8.pdb
NOT CONVERGED
CONVERGED
CONVERGED
9.pdb
NOT CONVERGED
CONVERGED
CONVERGED


In [None]:
c1_atom = rotated_complex.select_atoms('index ' + str(ini_important_indexes['C1']))
nearby_residues = rotated_complex.select_atoms("around 10.0 index " + str(ini_important_indexes['C1']))
## Get the unique residues within 3 Å of the INI residue
unique_residues = set([atom.residue for atom in nearby_residues])
## Select all atoms that are in these residues
atoms_in_nearby_residues = rotated_complex.select_atoms(" or ".join([f"resid {residue.resid}" for residue in unique_residues]))
print('Num QM Atoms:',len(atoms_in_nearby_residues))

Num QM Atoms: 326


In [None]:
print(unique_residues)

{<Residue GLY, 1028>, <Residue ILE, 1030>, <Residue PHE, 1031>, <Residue LEU, 1034>, <Residue WAT, 1177>, <Residue INI, 1>, <Residue INI, 1>, <Residue PRO, 30>, <Residue GLY, 31>, <Residue SER, 32>, <Residue ASN, 946>, <Residue SER, 947>, <Residue GLH, 55>, <Residue ARG, 951>, <Residue ARG, 969>, <Residue SER, 972>, <Residue GLY, 973>, <Residue THR, 78>, <Residue ILE, 974>, <Residue GLN, 118>}


In [None]:
c1_atom.positions

array([[-34.894, -35.257,  23.488]], dtype=float32)

In [None]:
rotated_complex.atoms.write("complexes/"+file_start+"/rotated_complex.pdb")