In [1]:
import MDAnalysis as mda
import numpy as np
import shutil
import csv
from utils import *
import warnings 

# Suppress warnings specific to MDAnalysis
warnings.filterwarnings("ignore", category=UserWarning, module="MDAnalysis")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# load receptor universe and extract the different parts of the protein int1 receptor 
head_dir = 'temp_structures/'

# dictionary of charged protein residues with key being the Amber resname and the value being the net charge of that residue
residue_charge_dict = {'ARG':1,'LYS':1,'HIP':1,'ASP':-1,'GLU':-1,'MG':2}

# specify active atoms
by_dist = True 
QM_sphere_r = 2 # Angstroms (a value of 2 will give only the ThDP intermediate (auto adds MG))
active_sphere_r = 8
water_sphere_r = 12
    
# TODO calculate protein charge automatically (or read it in from leap.log file)
base_charge = -4 # inp without accounting for R groups, you will never need to change this for INP      
intermediate = 'INP'

all_substrates = ['6']
for curr_substrate in all_substrates:
    
    donor_substrate = curr_substrate
    acceptor_substrate = curr_substrate
    
    complex = mda.Universe(head_dir+'aligned_last_frame.pdb')
    
    # get all atoms near the protein excluding NaCl
    trimmed_complex_initial = complex.select_atoms("(protein or resname INP or resname MG) or ((around 3.0 protein) and resname WAT)").residues
    # Renumber residues manually
    for i, residue in enumerate(trimmed_complex_initial):
        residue.resid = i + 1  # Assign new resid starting from 1
    
    #output_dir = '/projects/p30041/gbf4422/sensitivity_testing/water_test/additional_active_shell/WATER_3.0/'
    output_dir = 'temp_structures/'
    write_universe(output_dir + 'prep/','initial.pdb',trimmed_complex_initial)
    edit_protein_files(output_dir + 'prep/','initial.pdb')

    trimmed_complex = mda.Universe(output_dir + 'prep/initial.pdb')
    
    if acceptor_substrate in ['4','6','11','16']:
        additional_charge = -1
    else:
        additional_charge = 0
    # add the charge of the intermediate complex
    if donor_substrate in ['4','6','11','16']:
        additional_charge += -1
    residue_charge_dict[intermediate] = base_charge + additional_charge
    MM_charge = 0
    for residue in trimmed_complex.residues:
        curr_resname = residue.resname
        if curr_resname in residue_charge_dict:
            MM_charge += residue_charge_dict[curr_resname]

    inp = trimmed_complex.select_atoms("resname " + intermediate)
    # get the carbonyl carbon of the substrate
    #atom_dict = get_substrate_aka_indexes(ini)
    atom_dict = get_inp_indexes(inp)
    C2_index = atom_dict['C2']
    C2_id = inp.atoms[C2_index].index
    C2_atom = trimmed_complex.select_atoms("index " +  str(C2_id))

    #QM_residues, active_residues, QM_atoms_indexes,active_atoms_indexes,fixed_atoms_indexes = get_atoms_by_distance(trimmed_complex,QM_sphere_r,active_sphere_r,C2_id)
    
    QM_residues, active_residues, QM_atoms_indexes,active_atoms_indexes,fixed_atoms_indexes = get_water_by_distance(trimmed_complex,QM_sphere_r,active_sphere_r,water_sphere_r,C2_id)
    
    QM_residues_resids = [residue.resid for residue in QM_residues]
    active_residues_resids = [residue.resid for residue in active_residues]
    
    # simplify lists to write to file
    QM_list = simplify_integer_list(QM_atoms_indexes)
    active_list = simplify_integer_list(active_atoms_indexes)
    # calculate the charge of our system
    total_QM_charge = 0
    # get charge of QM region 
    for residue in QM_residues:
        resname = residue.resname
        if resname in residue_charge_dict:
            total_QM_charge += residue_charge_dict[resname]

    qm_complex_output_dir = output_dir + 'QM_' + str(QM_sphere_r) + '_Active_' + str(active_sphere_r) + '/'
    #write_MM_orca_script(active_list,MM_charge,output_dir+'MM_Opt_Active_10/')
    shutil.copy(head_dir + curr_substrate + '/prep/INP.mol2', output_dir + 'prep/INP.mol2')
    shutil.copy(head_dir + curr_substrate + '/prep/INP.frcmod', output_dir + '/prep/INP.frcmod')


    write_QMMM_orca_script(QM_list,active_list,total_QM_charge,qm_complex_output_dir)
    #shutil.copy(qm_complex_file_dir + '/qm_complex.ORCAFF.prms', qm_complex_output_dir + 'qm_complex.ORCAFF.prms')
    #shutil.copy(qm_complex_file_dir + '/qm_complex.pdb', qm_complex_output_dir + 'qm_complex.pdb')
    shutil.copy('scripts/template_orca_job_expanse.sh', qm_complex_output_dir + 'orca_job_expanse.sh')
    write_resids_to_csv(qm_complex_output_dir,f'QM_{QM_sphere_r}_and_{active_sphere_r}A_Active_residues.csv',QM_residues_resids,active_residues_resids)

File 'initial.pdb' has been written in 'temp_structures/prep/'.
Edited  temp_structures/prep/initial.pdb  for Amber
Taking best guess at ring N
Taking best guess at carbanion
C2 coords: [[54.607 60.293 69.703]]
C1 coords: [[55.618 59.733 70.757]]


In [6]:
print(C2_atom.atoms.positions)


[[56.8   58.293 74.32 ]]


In [3]:
def read_resids_from_csv(file_path):
    unique_lists = []
    
    # Read the CSV file
    with open(file_path, 'r') as csv_file:
        reader = csv.reader(csv_file)
        
        # Process each row, omitting the first column and ensuring uniqueness
        for row in reader:
            unique_lists.append(list(set(row[1:])))  # Skip the first column and ensure uniqueness

    QM_resids = unique_lists[0]
    QM_resids = [int(i) for i in QM_resids]
    
    active_resids = unique_lists[1]
    active_resids = [int(i) for i in active_resids]
    return QM_resids, active_resids

In [5]:
# load receptor universe and extract the different parts of the protein int1 receptor 
head_dir = '/projects/p30041/gbf4422/5EJ5/int3_R/'

# dictionary of charged protein residues with key being the Amber resname and the value being the net charge of that residue
residue_charge_dict = {'ARG':1,'LYS':1,'HIP':1,'ASP':-1,'GLU':-1,'MG':2}

# specify active atoms
by_dist = True 
QM_sphere_r = 2 # Angstroms (a value of 2 will give only the ThDP intermediate (auto adds MG))
active_sphere_r = 10
    
# TODO calculate protein charge automatically (or read it in from leap.log file)
base_charge = -3 # ino without accounting for R groups, you will never need to change this for INP      
intermediate = 'INP'

all_substrates = ['6']
for curr_substrate in all_substrates:
    
    donor_substrate = curr_substrate
    complex = mda.Universe(head_dir+curr_substrate+'/QMMM/int2/prep/initial.pdb')
    output_dir = head_dir + curr_substrate + '/QMMM/int2/prep/'
    
    if donor_substrate in ['4','6','11','16']:
        additional_charge = -1
    else:
        additional_charge = 0

    residue_charge_dict[intermediate] = base_charge + additional_charge
    MM_charge = 0
    for residue in complex.residues:
        curr_resname = residue.resname
        if curr_resname in residue_charge_dict:
            MM_charge += residue_charge_dict[curr_resname]

    # get the QM atoms and residues (automatically includes Mg2+)
    # get INP atoms 
    inp = complex.select_atoms("resname " + intermediate)
    write_universe(output_dir,'inp.pdb',inp)
    
    receptor = complex.select_atoms("not resname " + intermediate)
    write_universe(output_dir,'receptor.pdb',receptor)
    edit_protein_files(output_dir  ,'receptor.pdb')

    

File 'inp.pdb' has been written in '/projects/p30041/gbf4422/5EJ5/int3_R/1/QMMM/int2/prep/'.
File 'receptor.pdb' has been written in '/projects/p30041/gbf4422/5EJ5/int3_R/1/QMMM/int2/prep/'.
Edited  /projects/p30041/gbf4422/5EJ5/int3_R/1/QMMM/int2/prep/receptor.pdb  for Amber
File 'inp.pdb' has been written in '/projects/p30041/gbf4422/5EJ5/int3_R/2/QMMM/int2/prep/'.
File 'receptor.pdb' has been written in '/projects/p30041/gbf4422/5EJ5/int3_R/2/QMMM/int2/prep/'.
Edited  /projects/p30041/gbf4422/5EJ5/int3_R/2/QMMM/int2/prep/receptor.pdb  for Amber
File 'inp.pdb' has been written in '/projects/p30041/gbf4422/5EJ5/int3_R/3/QMMM/int2/prep/'.
File 'receptor.pdb' has been written in '/projects/p30041/gbf4422/5EJ5/int3_R/3/QMMM/int2/prep/'.
Edited  /projects/p30041/gbf4422/5EJ5/int3_R/3/QMMM/int2/prep/receptor.pdb  for Amber
File 'inp.pdb' has been written in '/projects/p30041/gbf4422/5EJ5/int3_R/5/QMMM/int2/prep/'.
File 'receptor.pdb' has been written in '/projects/p30041/gbf4422/5EJ5/int3_

In [12]:
all_substrates = ['0']
for curr_substrate in all_substrates:
    
    donor_substrate = curr_substrate
    complex = mda.Universe(head_dir+curr_substrate+'/QMMM/int2/prep/qm_complex.pdb')
    
    output_dir = head_dir + curr_substrate + '/QMMM/int2/QM_2_Active_6/'

    if donor_substrate in ['4','6','11','16']:
        additional_charge = -1
    else:
        additional_charge = 0

    residue_charge_dict[intermediate] = base_charge + additional_charge
      
    # Example usage
    file_path = head_dir+curr_substrate+'/QMMM/int3/QM_2_and_6A_Active_residues.csv'  # Replace with your file path
    QM_resids, active_resids = read_resids_from_csv(file_path)

    QM_atoms = complex.select_atoms('resid ' + ' or resid '.join([str(i) for i in QM_resids]))
    QM_residues = set([i.residue for i in QM_atoms])
    active_atoms = complex.select_atoms('resid ' + ' or resid '.join([str(i) for i in active_resids]))
    active_residues = set([i.residue for i in active_atoms])
    QM_atoms_indexes = get_atoms_by_reslist(complex,QM_resids)
    active_atoms_indexes = get_atoms_by_reslist(complex,active_resids)
    QM_residues_resids = QM_resids
    active_residues_resids = active_resids

    # simplify lists to write to file
    QM_list = simplify_integer_list(QM_atoms_indexes)
    active_list = simplify_integer_list(active_atoms_indexes)
    # calculate the charge of our system
    total_QM_charge = 0
    # get charge of QM region 
    for residue in QM_residues:
        resname = residue.resname
        if resname in residue_charge_dict:
            total_QM_charge += residue_charge_dict[resname]


    write_QMMM_orca_script(QM_list,active_list,total_QM_charge,output_dir)
    shutil.copy(head_dir+curr_substrate+'/QMMM/int2/prep/' + 'qm_complex.ORCAFF.prms', output_dir  + 'qm_complex.ORCAFF.prms')
    shutil.copy(head_dir+curr_substrate+'/QMMM/int2/prep/' + 'qm_complex.pdb', output_dir  + 'qm_complex.pdb')
    shutil.copy('scripts/template_orca_job_expanse.sh', output_dir + 'orca_job_expanse.sh')

Directory '/projects/p30041/gbf4422/5EJ5/int3_R/0/QMMM/int2/QM_2_Active_6/' does not exist. Creating it...
Complex QM/MM File processed and saved as /projects/p30041/gbf4422/5EJ5/int3_R/0/QMMM/int2/QM_2_Active_6/opt.inp
