In [1]:
import MDAnalysis as mda
import numpy as np
import shutil
import csv
from utils import *
import warnings 
import subprocess

# Suppress warnings specific to MDAnalysis
warnings.filterwarnings("ignore", category=UserWarning, module="MDAnalysis")

# dictionary of charged protein residues with key being the Amber resname and the value being the net charge of that residue
residue_charge_dict = {'ARG':1,'LYS':1,'ASP':-1,'GLU':-1,'MG':2}

# load receptor universe and extract the different parts of the protein int1 receptor 
head_dir = '/expanse/lustre/projects/nwu118/gbonnanzio/sensitivity_testing/active_size/Active_10/'

# dictionary of charged protein residues with key being the Amber resname and the value being the net charge of that residue
residue_charge_dict = {'ARG':1,'LYS':1,'HIP':1,'ASP':-1,'GLU':-1,'MG':2}

# TODO calculate protein charge automatically (or read it in from leap.log file)
base_charge = -4 # This is accounting for adding the proton to the devarboxylated intermediate    
starting_intermediate = 'INI'
ending_intermediate = 'INO'

all_substrates = ['6']
for curr_substrate in all_substrates:
    working_dir = head_dir #+ curr_substrate + '_' + curr_substrate + '/'
    # read in int1 pdb
    donor_substrate = curr_substrate
    complex = mda.Universe(working_dir + '/opt/opt.pdb')
    QM_resids, MM_resids = read_resids_from_csv(working_dir + 'prep/QM_2_and_10A_Active_residues.csv') 
    QM_resids, active_resids = read_resids_from_csv(working_dir + 'prep/QM_2_and_10A_Active_residues.csv') 

    # Get the charge of the QM and MM systems 
    if curr_substrate in ['4','6','11','16']:
        additional_charge = -1
    else:
        additional_charge = 0
    
    residue_charge_dict[ending_intermediate] = base_charge + additional_charge
    
    # get the complex atoms 
    ini = complex.select_atoms("resname " + starting_intermediate)
    ini_max_index = max(ini.atoms.indices)
    
    receptor = complex.select_atoms("not resname " + starting_intermediate)
    atom_dict = get_ini_indexes(ini)

    
    # get the index of these atoms in the context of the complex universe
    C1_index = ini.atoms[atom_dict['C1']].index
    C2_index = ini.atoms[atom_dict['C2']].index
    C3_index = ini.atoms[atom_dict['C3']].index
    O1_index = ini.atoms[atom_dict['O1']].index
    O2_index = ini.atoms[atom_dict['CO2_0']].index
    O3_index = ini.atoms[atom_dict['CO2_1']].index
    
    '''    
    # guess the position of the hydrogen atom that we want to add
    C1_coords = complex.atoms[C1_index].position
    C2_coords = complex.atoms[C2_index].position
    O1_coords = complex.atoms[O1_index].position
    vector_C1_to_C2 = C2_coords - C1_coords
    unit_vector = vector_C1_to_C2 / np.linalg.norm(vector_C1_to_C2)
    guess_H1_coords = O1_coords + unit_vector * bond_dists['O-H'] # C2 should be located 1.54 A away from C1

    # creat a new universe for H
    H_u = mda.Universe.empty(1,
                             n_residues=1,
                             trajectory=True) # necessary for adding coordinates
    H_count = np.count_nonzero(ini.atoms.types == 'H')
    H_u.add_TopologyAttr('name',['H'+str(H_count)])
    H_u.add_TopologyAttr('type', ['H'])
    H_u.add_TopologyAttr('resname', [starting_intermediate])
    H_u.add_TopologyAttr('resid', [ini.resids[0]])
    H_u.atoms.positions = [guess_H1_coords]
    '''

    indices_to_remove = [C3_index,O2_index,O3_index]
    # Select atoms to keep (all atoms excluding the ones in indices_to_remove)
    mask = ~np.isin(ini.atoms.indices, indices_to_remove)
    atoms_to_keep = ini.atoms[mask]
    modified_ini_universe = mda.Merge(atoms_to_keep)
    
    # write a pdb for just the INO molecule
    output_dir = working_dir + 'int2/prep/' 
    
    #ino_universe = mda.Merge(modified_ini_universe.atoms,H_u.atoms)

    ino_universe = modified_ini_universe
    for atom in ino_universe.atoms:
        atom.residue.resid = ini.resids[0]
        atom.residue.resname = ending_intermediate
    
    write_universe(output_dir,'ino.pdb',ino_universe)
    
    # add the substrate and ThDP to a single universe
    ino_complex = mda.Merge(receptor.atoms,ino_universe.atoms)
    
    write_universe(output_dir,'ino_complex.pdb',ino_complex)
    edit_protein_files(output_dir,'ino_complex.pdb')
    
    # Run the bash script from the specified directory
    
    result = subprocess.run(
        ["bash", "/expanse/lustre/projects/nwu118/gbonnanzio/Generate_QMMM_Geometries/scripts/assemble_int2.sh", str(residue_charge_dict[ending_intermediate]), output_dir],
        cwd=head_dir,
        capture_output=True,
        text=True
    )
    
    if result.returncode != 0:
        print('ERROR WITH tleap')
        print(result.stderr)  # Print the error output
    else:
        print('tleap was successful')
        #print(result.stdout)  # Print the normal output
    
    
    # read back in the amber generated structure that will fix resids 
    mm_output_dir = working_dir + 'int2/mm_opt/'
    ino_complex = mda.Universe(mm_output_dir + 'qm_complex.pdb')
    
    # get charge of MM region 
    MM_charge = 0
    for residue in ino_complex.residues:
        curr_resname = residue.resname
        if curr_resname in residue_charge_dict:
            MM_charge += residue_charge_dict[curr_resname]
            
    print('Charge of entire system:',MM_charge)
    
    # get the MM atom indexes
    MM_atom_indexes = get_atoms_by_reslist(ino_complex,MM_resids)
    # simplify lists to write to file
    MM_list = simplify_integer_list(MM_atom_indexes)
    
    # output the MM script
    write_MM_orca_script(MM_list,MM_charge,mm_output_dir,'mm_opt.inp')
    shutil.copy('scripts/template_orca_job_expanse.sh',mm_output_dir + 'orca_job_expanse.sh')

    # calculate the charge of our system
    total_QM_charge = 0
    # get charge of QM region 
    for residue in QM_resids:
        print(residue)
        resname = ino_complex.residues[residue-1].resname
        print(resname)
        if resname in residue_charge_dict:
            total_QM_charge += residue_charge_dict[resname]
        
    QM_atoms = ino_complex.select_atoms('resid ' + ' or resid '.join([str(i) for i in QM_resids]))
    QM_residues = set([i.residue for i in QM_atoms])
    active_atoms = ino_complex.select_atoms('resid ' + ' or resid '.join([str(i) for i in active_resids]))
    active_residues = set([i.residue for i in active_atoms])
    QM_atoms_indexes = get_atoms_by_reslist(ino_complex,QM_resids)
    active_atoms_indexes = get_atoms_by_reslist(ino_complex,active_resids)

    QM_list = simplify_integer_list(QM_atoms_indexes)
    active_list = simplify_integer_list(active_atoms_indexes)
    
    fixed_atoms = extract_fixed_atoms(working_dir + '/opt/opt.out')
    threshold_index = ini_max_index - 3
    fixed_atoms_modified = []
    for fixed_atom in fixed_atoms:
        if fixed_atom > threshold_index:
            fixed_atoms_modified.append(fixed_atom-3)
        else:
            fixed_atoms_modified.append(fixed_atom)
        
    fixed_atoms_list = simplify_integer_list(fixed_atoms_modified)

    qm_output_dir = working_dir + 'int2/opt/'
    write_QMMM_orca_script(QM_list,active_list,total_QM_charge,qm_output_dir,'opt.inp',fixed_atoms_list)


  from .autonotebook import tqdm as notebook_tqdm
