In [11]:
import MDAnalysis as mda
import numpy as np
import os
import shutil
import csv
from utils import *
import warnings 

# Suppress warnings specific to MDAnalysis
warnings.filterwarnings("ignore", category=UserWarning, module="MDAnalysis")

In [12]:
# dictionary of charged protein residues with key being the Amber resname and the value being the net charge of that residue
residue_charge_dict = {'ARG':1,'LYS':1,'ASP':-1,'GLU':-1,'MG':2}

In [13]:
def write_MM_orca_script(active_atoms_str,total_MM_charge,output_path):
    # Define the input and output file paths
    input_file = "template_MM_script.inp"
    # Open the input file and read its contents
    with open(input_file, 'r') as file:
        content = file.read()
    
    # Replace the {} placeholders with the variable values
    content = content.replace("{}", "{" + active_atoms_str + "}" , 1)  # First occurrence
    # Add the custom line to the end
    custom_line = "*pdbfile " + str(total_MM_charge) +" 1 qm_complex.pdb"    
    content = content.replace("*pdbfile", custom_line , 1)  # First occurrence
    content += '\n'
    
    # Write the modified content to the output file
    file_name = "mm_opt.inp"
    output_file = output_path + file_name
    # Check if the directory exists
    if not os.path.exists(output_path):
        print(f"Directory '{output_path}' does not exist. Creating it...")
        os.makedirs(output_path)
    with open(output_file, 'w') as file:
        file.write(content)
    
    print(f"MM File processed and saved as {output_file}")

def write_QMMM_orca_script(QM_atoms_str,active_atoms_str,total_QM_charge,output_path):
    # Define the input and output file paths
    input_file = "template_QMMM_script.inp"    
    # Open the input file and read its contents
    with open(input_file, 'r') as file:
        content = file.read()
    
    # Replace the {} placeholders with the variable values
    content = content.replace("{}", "{" + QM_atoms_str + "}" , 1)  # First occurrence
    content = content.replace("{}", "{" + active_atoms_str + "}", 1)  # Second occurrence
    # Add the custom line to the end
    custom_line = "*pdbfile " + str(total_QM_charge) +" 1 mm_opt.pdb\n"
    content += custom_line
    
    # Write the modified content to the output file
    file_name = "opt.inp"
    output_file = output_path + file_name
    # Check if the directory exists
    if not os.path.exists(output_path):
        print(f"Directory '{output_path}' does not exist. Creating it...")
        os.makedirs(output_path)
    with open(output_file, 'w') as file:
        file.write(content)
    print(f"Complex QM/MM File processed and saved as {output_file}")
    
def write_resids_to_csv(output_path,file_name,QM_residue_list,active_residue_list):
    
    output_file = output_path + file_name
    # Write to the CSV
    if not os.path.exists(output_path):
        print(f"Directory '{output_path}' does not exist. Creating it...")
        os.makedirs(output_path)
    with open(output_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['QM RESIDUES'] + QM_residue_list)  # Write first list with label
        writer.writerow(['ACTIVE RESIDUES'] + active_residue_list)  # Write second list with label

    print(f"Data written to {output_file}")

In [14]:
enzyme_dir = '5EJ5/'
output_dir = '/Users/gbonn/OneDrive - Northwestern University/Bonnanzio Geoffrey/04 Raw Data/5EJ5/dG_tests/'
complex_dir = output_dir + 'int1/'
receptor_dir = output_dir + "receptor/"
all_dirs = ['18']
QM_sphere_r = 2 # Angstroms
active_sphere_r = 6


In [17]:

for curr_dir in all_dirs:
    # add the charge of the INI complex 
    if curr_dir in ['4','6','11','16']:
        residue_charge_dict['INI'] = -5
        MM_charge = -12
    else:
        residue_charge_dict['INI'] = -4
        MM_charge = -11

    # read in the system that you are about to model
    qm_complex_file_dir = enzyme_dir + "complexes/" + curr_dir 
    complex = mda.Universe(qm_complex_file_dir+ '/qm_complex.pdb')
    ini = complex.select_atoms("resname INI")
    # get the carbonyl carbon of the substrate
    aka_atom_dict = get_substrate_aka_indexes(ini)
    C2_index = aka_atom_dict['C2']
    C2_id = ini.atoms[C2_index].index
    C2_atom = complex.select_atoms("index " +  str(C2_id))
    # get the QM atoms and residues
    QM_residues, active_residues, QM_atoms_indexes,active_atoms_indexes,fixed_atoms_indexes = get_atoms_by_distance(complex,QM_sphere_r,active_sphere_r,C2_id)
    
    # simplify lists to write to file
    QM_list = simplify_integer_list(QM_atoms_indexes)
    active_list = simplify_integer_list(active_atoms_indexes)
    # if fixed atoms are being used... I have been using 
    # fixed_list = simplify_integer_list(fixed_atoms_indexes) 

    MM_output_dir = complex_dir + curr_dir + '/MM_Active_' + str(active_sphere_r) + '/'
    #write_MM_orca_script(active_list,MM_charge,MM_output_dir)
    #shutil.copy(qm_complex_file_dir + '/qm_complex.pdb', MM_output_dir + 'qm_complex.pdb')
    #shutil.copy(qm_complex_file_dir + '/qm_complex.ORCAFF.prms', MM_output_dir + 'qm_complex.ORCAFF.prms')
    # calculate the charge of our system
    total_QM_charge = 0
    # get charge of QM region 
    print(QM_residues)
    for residue in QM_residues:
        resname = residue.resname
        if resname in residue_charge_dict:
            total_QM_charge += residue_charge_dict[resname]

    #write_QMMM_orca_script(QM_list,active_list,total_QM_charge,complex_dir + curr_dir + '/QM_Active_' + str(active_sphere_r) + '/')
    
    QM_residues_resids = [residue.resid for residue in QM_residues]
    active_residues_resids = [residue.resid for residue in active_residues]
    
    #write_resids_to_csv(complex_dir + curr_dir + '/',f'QM_and_{active_sphere_r}A_Active_residues.csv',QM_residues_resids,active_residues_resids)
    
    # read in the system that you are about to model
    receptor = mda.Universe(enzyme_dir + 'receptor/' + 'qm_receptor.pdb')
    active_atoms_in_receptor = get_atoms_by_reslist(receptor,active_residues_resids)
    QM_atoms_in_receptor = get_atoms_by_reslist(receptor,QM_residues_resids)
    # simplify lists to write to file
    receptor_QM_atoms_list = simplify_integer_list(QM_atoms_in_receptor)
    receptor_active_atoms_list = simplify_integer_list(active_atoms_in_receptor)

    #write_QMMM_orca_script(receptor_QM_atoms_list,receptor_active_atoms_list,-3,receptor_dir + 'QM_Active_' + str(active_sphere_r) + '/')



{<Residue MG, 1113>, <Residue INI, 1114>}
