This code is based on the optimized structure of a homo-coupled 6-6 (akg) reaction in MenD (5EJ5) The idea is to keep the ThDP-bound product structure and use this code to only replace the R groups. 

In [1]:
import MDAnalysis as mda
from MDAnalysis.core.universe import Merge
import numpy as np
import os
from utils import *
import warnings 
import matplotlib.pyplot as plt

# Suppress warnings specific to MDAnalysis
warnings.filterwarnings("ignore", category=UserWarning, module="MDAnalysis")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# calculate vectors for the direction of each R group
# P denotes prime, the atom of the acceptor molecule 
C2_coords = np.array([-35.281,-36.298,21.222])
C2P_coords = np.array([-35.221,-37.893,21.065])
R_coords = np.array([-36.642,-35.850,20.640])
RP_coords = np.array([-36.528,-38.655,21.359])

C2_R_vec = R_coords - C2_coords
C2P_RP_vec = RP_coords - C2P_coords
C2R_unit = C2_R_vec / np.linalg.norm(C2_R_vec)
C2RP_unit = C2P_RP_vec / np.linalg.norm(C2P_RP_vec)

In [50]:
# define the coordinates of the heavy atom R groups for the donor and acceptor from the optimized geometry
# We will find the average plane through these points and try to match the normal of that plane when we replace 
# the new R groups for donor and acceptor

donor_heavy_atom_R_coords = np.array([[-35.281,-36.298,21.222],
                                      [-36.642, -35.850, 20.640],
                                      [-36.649, -34.402, 20.172],
                                      [-38.045, -34.224, 19.630],
                                      [-38.831, -33.511, 20.301],
                                      [-38.401, -34.829, 18.588]])

acc_heavy_atom_R_coords =   np.array([[-35.221,-37.893,21.065],
                                      [-36.528, -38.655, 21.359],
                                      [-36.652, -40.020, 20.681],
                                      [-38.123, -40.440, 20.288],
                                      [-39.005, -39.560, 20.254],
                                      [-38.263, -41.647, 20.032]])

# Calculate the average plane of our template R groups 
donor_normal, donor_centroid = calculate_average_plane(donor_heavy_atom_R_coords)
acc_normal, acc_centroid = calculate_average_plane(acc_heavy_atom_R_coords)

In [52]:
# load receptor universe and extract the different parts of the protein int1 receptor 
head_dir = '5EJ5/int3/'
receptor = mda.Universe(head_dir+'template.pdb')

output_dir = '/Users/gbonn/OneDrive - Northwestern University/Bonnanzio Geoffrey/04 Raw Data/Generate_QMMM_Geometries/5EJ5/int3/'

# output the ThDP cofactor
INP = receptor.select_atoms("resname INP")
#write_universe(output_dir +'receptor/','INP.pdb',ThDP)

# output just the protein and edit the file so it is Amber readable 
protein = receptor.select_atoms(f"protein or resname MG")
write_universe(output_dir  + 'receptor/','protein.pdb',protein)
edit_protein_files(output_dir + 'receptor/','protein.pdb')

# output the water
try:
    water = receptor.select_atoms(f"resname WAT")
    write_universe(output_dir + 'receptor/','water.pdb',water)
except:
    print('No water to write')

# output the receptor (everything besides ThDP)
protein_MG_water = receptor.select_atoms("not resname INP")
write_universe(output_dir  + 'receptor/','receptor.pdb',protein_MG_water)
edit_protein_files(output_dir  + 'receptor/','receptor.pdb')

File 'protein.pdb' has been written in '/Users/gbonn/OneDrive - Northwestern University/Bonnanzio Geoffrey/04 Raw Data/Generate_QMMM_Geometries/5EJ5/int3/receptor/'.
Edited  /Users/gbonn/OneDrive - Northwestern University/Bonnanzio Geoffrey/04 Raw Data/Generate_QMMM_Geometries/5EJ5/int3/receptor/protein.pdb  for Amber
File 'water.pdb' has been written in '/Users/gbonn/OneDrive - Northwestern University/Bonnanzio Geoffrey/04 Raw Data/Generate_QMMM_Geometries/5EJ5/int3/receptor/'.
File 'receptor.pdb' has been written in '/Users/gbonn/OneDrive - Northwestern University/Bonnanzio Geoffrey/04 Raw Data/Generate_QMMM_Geometries/5EJ5/int3/receptor/'.
Edited  /Users/gbonn/OneDrive - Northwestern University/Bonnanzio Geoffrey/04 Raw Data/Generate_QMMM_Geometries/5EJ5/int3/receptor/receptor.pdb  for Amber


In [53]:
def diff_btwn_planes(normal_1,normal_2):
    # Normalize the normals
    n1 = normal_1 / np.linalg.norm(normal_1)
    n2 = normal_2 / np.linalg.norm(normal_2)
    
    # Calculate angle between normals
    dot_product = np.dot(n1, n2)
    angle = np.degrees(np.arccos(np.clip(dot_product, -1.0, 1.0))) 
    
    return angle

def rotation_objective(angle,md_universe,fixed_index_1,fixed_index_2,rotating_atom_indexes,reference_plane_normal):
    rotated_donor = rotate_atoms(md_universe, fixed_index_1,fixed_index_2,rotating_atom_indexes,angle)
    atom_positions = []
    for i in range(0,len(rotated_donor.atoms)):
        if rotated_donor.atoms[i].type != 'H':
            atom_positions.append(rotated_donor.atoms[i].position)
    plane_normal, plane_centroid = calculate_average_plane(np.array(atom_positions))
    angle_between_planes = diff_btwn_planes(plane_normal,reference_plane_normal)
    return angle_between_planes

def optimize_rotation(initial_angle,md_universe:mda.core.universe.Universe,fixed_index_1:int,fixed_index_2:int,rotating_atom_indexes:list,reference_plane_normal:np.ndarray):
    # Set up optimization
    tolerance = 1e-12
    result = minimize(
        rotation_objective,
        initial_angle,
        args=(md_universe.copy(),fixed_index_1,fixed_index_2,rotating_atom_indexes,reference_plane_normal),
        tol=tolerance,
        method='Nelder-Mead'
    )
    # Check for successful optimization
    if result.success:
        print('CONVERGED')
    else:
        print('NOT CONVERGED')
    return result.x

def find_atom_in_new_universe(md_universe,check_coords):
    min_dist = 10**6 
    for i in range(0,len(md_universe.atoms)):
        curr_pos = md_universe.atoms[i].position
        curr_dist = get_dist(curr_pos,check_coords)
        if curr_dist < min_dist:
            min_dist = curr_dist
            new_atom_index = i
    return new_atom_index

In [54]:
# read in each of the substrate files
directory = 'substrates_initial/'
file_names = [f for f in os.listdir(directory)]
file_names = ['6.pdb']

for curr_file_name in file_names:
    print(curr_file_name)
    # load substrate universe
    file_start = curr_file_name.split('.')[0]
    substrate = mda.Universe(directory+curr_file_name)
    # identify the atoms that comprise the aka substrates 
    substrate_important_indexes = get_substrate_aka_indexes(substrate.atoms)
        
    indices_to_remove = [substrate_important_indexes['O1'],
                         substrate_important_indexes['C3'],
                         substrate_important_indexes['O2'],
                         substrate_important_indexes['O3']]
    # S denotes unbound susbtrate we are trying to align 
    C2S_coords = get_atom_position(substrate,substrate_important_indexes['C2'])
    RS_coords = get_atom_position(substrate,substrate_important_indexes['R'])
    initial_S_positions = np.array([C2S_coords,RS_coords])
    if file_start == '7':
        R_dist = bond_dists['C-N']
    else:
        R_dist = bond_dists['C-C']

    guess_R_coords = C2_coords + C2R_unit * R_dist 
    final_donor_positions = np.array([C2_coords,guess_R_coords])
    guess_RP_coords = C2P_coords + C2RP_unit * R_dist 
    final_acc_positions = np.array([C2P_coords,guess_RP_coords])

    R_donor, t_donor = kabsch_algorithm(initial_S_positions,final_donor_positions)
    # make a copy of the substrate object and update atom positions by aligning aka head atoms
    donor_aligned = substrate.copy()
    for i in range(0,len(donor_aligned.atoms.positions)):
        atom_coords = donor_aligned.atoms[i].position
        new_coords = np.dot(R_donor, atom_coords) + t_donor
        donor_aligned.atoms[i].position = new_coords

    # Select atoms to keep (all atoms excluding the ones in indices_to_remove)
    mask = ~np.isin(donor_aligned.atoms.indices, indices_to_remove)
    atoms_to_keep = donor_aligned.atoms[mask]
    modified_donor = Merge(atoms_to_keep)
    #write_universe(output_dir+file_start+'/', 'donor.pdb', modified_donor)
    C2_new_index = find_atom_in_new_universe(modified_donor,C2_coords)
    R_new_index = find_atom_in_new_universe(modified_donor,R_coords)

    atoms_to_rotate = [i for i in range(0,len(modified_donor.atoms)) if i not in [C2_new_index,R_new_index]]
    final_donor_angle = optimize_rotation(np.array(100.0),modified_donor.copy(),C2_new_index, R_new_index, atoms_to_rotate,donor_normal)
    rotated_donor = rotate_atoms(modified_donor.copy(), C2_new_index, R_new_index, atoms_to_rotate, final_donor_angle[0])
    write_universe(output_dir+file_start+'/', 'rotated_donor.pdb', rotated_donor)

    R_acc, t_acc = kabsch_algorithm(initial_S_positions,final_acc_positions)
    # make a copy of the substrate object and update atom positions by aligning aka head atoms
    acc_aligned = substrate.copy()
    for i in range(0,len(acc_aligned.atoms.positions)):
        atom_coords = acc_aligned.atoms[i].position
        new_coords = np.dot(R_acc, atom_coords) + t_acc
        acc_aligned.atoms[i].position = new_coords

    # Select atoms to keep (all atoms excluding the ones in indices_to_remove)
    mask = ~np.isin(acc_aligned.atoms.indices, indices_to_remove)
    atoms_to_keep = acc_aligned.atoms[mask]
    modified_acc = Merge(atoms_to_keep)
    #write_universe(output_dir+file_start+'/', 'acc.pdb', modified_acc)
    C2P_new_index = find_atom_in_new_universe(modified_acc,C2P_coords)
    RP_new_index = find_atom_in_new_universe(modified_acc,RP_coords)

    atoms_to_rotate = [i for i in range(0,len(modified_acc.atoms)) if i not in [C2P_new_index,RP_new_index]]
    final_acc_angle = optimize_rotation(np.array(100.0),modified_acc.copy(),C2P_new_index, RP_new_index, atoms_to_rotate,acc_normal)
    rotated_acc = rotate_atoms(modified_acc.copy(), C2P_new_index, RP_new_index, atoms_to_rotate, final_acc_angle[0])
    write_universe(output_dir+file_start+'/', 'rotated_acc.pdb', rotated_acc)

6.pdb
CONVERGED
File 'rotated_donor.pdb' has been written in '/Users/gbonn/OneDrive - Northwestern University/Bonnanzio Geoffrey/04 Raw Data/Generate_QMMM_Geometries/5EJ5/int3/6/'.
CONVERGED
File 'rotated_acc.pdb' has been written in '/Users/gbonn/OneDrive - Northwestern University/Bonnanzio Geoffrey/04 Raw Data/Generate_QMMM_Geometries/5EJ5/int3/6/'.


In [39]:
print(final_angle)

[100.]


In [32]:
print(modified_donor.atoms[0].index)

0


In [None]:
rotated_donor = rotate_atoms(modified_donor.copy(), atom1_index, atom2_index, atoms_to_rotate, degrees_rotated)

    R_acc, t_acc = kabsch_algorithm(initial_S_positions,final_acc_positions)
    # make a copy of the substrate object and update atom positions by aligning aka head atoms
    acc_aligned = substrate.copy()
    for i in range(0,len(acc_aligned.atoms.positions)):
        atom_coords = acc_aligned.atoms[i].position
        new_coords = np.dot(R_acc, atom_coords) + t_acc
        acc_aligned.atoms[i].position = new_coords

    # Select atoms to keep (all atoms excluding the ones in indices_to_remove)
    mask = ~np.isin(acc_aligned.atoms.indices, indices_to_remove)
    atoms_to_keep = acc_aligned.atoms[mask]
    modified_acc = Merge(atoms_to_keep)
    #write_universe(output_dir+file_start+'/', 'acc.pdb', modified_acc)


In [None]:

    # initial coords of C2, C3, and O1 
    initial_positions = [get_atom_position(substrate,substrate_important_indexes['C2']),
                        get_atom_position(substrate,substrate_important_indexes['C3']),
                        get_atom_position(substrate,substrate_important_indexes['O1'])]
    # we go through two rounds of optimization, first using the guess location of 
    # C2 as the starting position for each atom we are trying to place (C2,C3,O1) 
    initial_guess = np.hstack([guess_C2 for i in range(3)])
    C2_optimized, C3_optimized, O1_optimized = optimize_coordinates(initial_guess, centers, radii)
    all_optimized = [C2_optimized, C3_optimized, O1_optimized]
    # get the final error for each atom's position 
    C2_err = atom_objective(C2_optimized, centers, radii[0])
    C3_err = atom_objective(C3_optimized, centers, radii[1])
    O1_err = atom_objective(O1_optimized, centers, radii[2])
    # use the atom with the minimum error for the next round of optimization
    all_errors = [C2_err,C3_err,O1_err]
    min_error_index = all_errors.index(min(all_errors))
    redo_initial_guess = np.hstack([all_optimized[min_error_index] for i in range(3)])
    C2_reoptimized, C3_reoptimized, O1_reoptimized = optimize_coordinates(redo_initial_guess,centers, radii)
    final_positions = [C2_reoptimized, C3_reoptimized, O1_reoptimized]
    # Get the rotation and translation matrix from our initial substrate to our int1 geoemtry
    R_int1, t_int1 = kabsch_algorithm(np.array(initial_positions),np.array(final_positions))
    
    # make a copy of the substrate object and update atom positions by aligning aka head atoms
    substrate_aka_aligned = substrate.copy()
    for i in range(0,len(substrate_aka_aligned.atoms.positions)):
        atom_coords = substrate_aka_aligned.atoms[i].position
        new_coords = np.dot(R_int1, atom_coords) + t_int1
        substrate_aka_aligned.atoms[i].position = new_coords
    # get the updated coords for important atoms 
    C2_coords = substrate_aka_aligned.atoms.positions[substrate_important_indexes['C2']]
    O1_coords = substrate_aka_aligned.atoms.positions[substrate_important_indexes['O1']]
    C3_coords = substrate_aka_aligned.atoms.positions[substrate_important_indexes['C3']]
    R_coords =  substrate_aka_aligned.atoms.positions[substrate_important_indexes['R']]
    # we have placed an SP2 hydbridized substrate (around C2), move R to make it SP3
    R_coords_opt = optimize_angles(R_coords,C1_coords,C2_coords,O1_coords,C3_coords,target_angles)
    # get a rotation and translation matrix from the original to the optimized R location 
    R_tail, t_tail = kabsch_algorithm([R_coords,C2_coords],[R_coords_opt,C2_coords])
    
    # apply rotation and translation transformation to only the tail atoms 
    substrate_tail_atom_indexes = [i for i in range(0,len(substrate.atoms)) if i not in substrate_important_indexes.values()]
    substrate_tail_atom_indexes.append(substrate_important_indexes['R'])
    # make a copy of the substrate object and update tail atom positions
    substrate_aligned = substrate_aka_aligned.copy()
    for i in substrate_tail_atom_indexes:
        atom_coords = substrate_aligned.atoms[i].position
        new_coords = np.dot(R_tail, atom_coords) + t_tail
        substrate_aligned.atoms[i].position = new_coords
    # add the substrate and ThDP to a single INI universe
    ini_universe = mda.Merge(substrate_aligned.atoms,ThDP.atoms)
    for atom in ini_universe.atoms:
        atom.residue.resid = 1
        atom.residue.resname = "INI"
        atom.record_type = "HETATM"
    
    # determine how much the indexes of INI and ThDP have shifted and consolidate dictionary
    ini_substrate_shift = determine_index_shift(substrate_aligned,ini_universe,substrate_tail_atom_indexes)
    ini_ThDP_shift = determine_index_shift(ThDP,ini_universe,list(ThDP_important_indexes.values()))
    ini_tail_atom_indexes = [i + ini_substrate_shift for i in substrate_tail_atom_indexes]
    ini_important_indexes = {}
    for key in substrate_important_indexes:
        ini_important_indexes[key] = substrate_important_indexes[key] + ini_substrate_shift
    for key in ThDP_important_indexes:
        ini_important_indexes[key] = ThDP_important_indexes[key] + ini_ThDP_shift    
    # add ini and receptor to the same universe to form the full complex
    complex = mda.Merge(protein_MG_water.atoms,ini_universe.atoms)
    
    # determine how much the indexing has changed and update dictionary
    complex_shift = determine_index_shift(ini_universe,complex,substrate_tail_atom_indexes)
    complex_tail_atom_indexes = [i + complex_shift for i in ini_tail_atom_indexes]
    for key in ini_important_indexes:
        ini_important_indexes[key] = ini_important_indexes[key] + complex_shift
    # get the atoms involved in the bond that we are going to rotate around (carbonyl carbon to first atom in the tail)
    atom1_index = ini_important_indexes['C2'] 
    atom2_index = ini_important_indexes['R']  
    complex_tail_atom_indexes.remove(atom2_index)
    atoms_to_rotate =  complex_tail_atom_indexes 
    
    # fix the R atom bonded to C2 and allow the tail atoms to rotate to minimize clashes 
    pdb_indexes = [i+1 for i in complex_tail_atom_indexes]
    degrees_rotated = 0
    all_degrees_rotated = [degrees_rotated]
    threshold = clash_threshold
    num_clash = get_atom_clashes(complex,pdb_indexes,threshold)
    all_clashes =[num_clash]
    min_num_clash = num_clash
    curr_angle_streak = [degrees_rotated]
    longest_angle_streak = [degrees_rotated]
    all_zero_angles = []
    while degrees_rotated <= 360:
            
        # rotate the complex a specified number of degrees    
        rotated_complex = rotate_atoms(complex.copy(), atom1_index, atom2_index, atoms_to_rotate, degrees_rotated)
        num_clash = get_atom_clashes(rotated_complex,pdb_indexes,threshold)
        if num_clash == 0:
            all_zero_angles.append(degrees_rotated)
            
        all_degrees_rotated.append(degrees_rotated)
        all_clashes.append(num_clash)
        # if the number of clashes is equal to minimum number of clashes found 
        if num_clash == min_num_clash: 
            curr_angle_streak.append(degrees_rotated) # add the current angle to the current streak
        elif num_clash < min_num_clash: # if we have found a more favorable position
            curr_angle_streak = [degrees_rotated] # start over 
            min_num_clash = num_clash
        else: # if we increase from the current minimum record the current streak
            # if we have found a new longest streak 
            if len(longest_angle_streak) < len(curr_angle_streak):
                longest_angle_streak = curr_angle_streak
            curr_angle_streak = []
        degrees_rotated += 5
    # take the median value of the longest minimum streak to get the minimum rotated complex
    len_longest_streak = len(longest_angle_streak)
    median_min_angle = longest_angle_streak[len_longest_streak//2]
    min_rotated_complex = rotate_atoms(complex.copy(), atom1_index, atom2_index, atoms_to_rotate, median_min_angle)
    all_substrate_rotation_clashes.append(all_clashes)
    # output all necessary files
    receptor = min_rotated_complex.select_atoms("not resname INI and not resname WAT")
    receptor_dir = output_dir + "complexes/" + file_start + '/'
    receptor_file_name = "receptor.pdb"
    write_universe(receptor_dir, receptor_file_name, receptor)
    edit_protein_files(receptor_dir,receptor_file_name)
    # output the water
    try:
        water = min_rotated_complex.select_atoms(f"resname WAT")
        water_dir = output_dir + "complexes/" + file_start + '/'
        water_file_name = "water.pdb"
        #write_universe(water_dir,water_file_name,water)
    except:
        print('No water to write')
    ini_final = min_rotated_complex.select_atoms("resname INI")
    ini_dir = output_dir + "complexes/" + file_start
    ini_file_name = "ini.pdb"
    write_universe(ini_dir, ini_file_name, ini_final)
