In [1]:
import os
import datetime
import logging
import shutil
import pandas as pd
import glob

'''
from make_top_box import make_top_protein
from FillWater_AddIons import fill_water_ions
from Energy_Minimization import energy_min
from Nvt_Npt import make_new_minim_nvt_npt
from SAMD import make_new_minim_config_samd
from SAMD import run_md
'''
from FUNCTION import make_top_protein, fill_water_ions, energy_min, make_new_minim_nvt_npt, make_new_minim_config_samd, run_md
from FUNCTION import files_gmxmmpbsa, gmx_mmpbsa

#PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
PROJECT_ROOT = os.getcwd()

DATA_DIR = os.path.join(PROJECT_ROOT, "DATA")
VMD_DIR = os.path.join(PROJECT_ROOT, "VMD_FUNCTION")
FUNCTION_DIR = os.path.join(PROJECT_ROOT, "FUNCTION")
FORCE_FIELD_PATH = os.path.join(PROJECT_ROOT, "FORCE_FIELD")
MMPBSA_INFILE_PATH = os.path.join(PROJECT_ROOT, "gmx_mmpbsa_in")
# pdb file
#protein_infile = "HLA_BiAB_protein_50ns" 
protein_infile = "mtbind"
protein_file_path = os.path.join(DATA_DIR, f"{protein_infile}.pdb")

# MDP files
ions_mdp_file = "ions"
minim_mdp_file = "minim"
nvt_mdp_file = "NVT"
npt_mdp_file = "NPT"
samd_mdp_file = "SAMD"
md_mdp_file = "EngComp_ff14sb_custom"
only_protein_md_mdp_file = "Protein_EngComp_ff14sb_custom"

ions_mdp_path = os.path.join(DATA_DIR, f"{ions_mdp_file}.mdp")
minim_mdp_path = os.path.join(DATA_DIR, f"{minim_mdp_file}.mdp")
nvt_mdp_path = os.path.join(DATA_DIR, f"{nvt_mdp_file}.mdp")
npt_mdp_path = os.path.join(DATA_DIR, f"{npt_mdp_file}.mdp")
samd_mdp_path = os.path.join(DATA_DIR, f"{samd_mdp_file}.mdp")
md_mdp_path = os.path.join(DATA_DIR, f"{md_mdp_file}.mdp")
only_protein_md_mdp_path = os.path.join(DATA_DIR, f"{only_protein_md_mdp_file}.mdp")



def create_output_directory():
    
    current_dir = os.getcwd()
    
    timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
    output_dir_name = f"output_{timestamp}"
    output_dir_path = os.path.join(current_dir, output_dir_name)
    
    os.mkdir(output_dir_path)
    print(f"Created directory: {output_dir_path}")

    os.chdir(output_dir_path)

    
    
    return output_dir_path

In [2]:
ROOT_OUTPUT = create_output_directory()

logging.basicConfig(
    filename = "OUTPUT.out",
    level = logging.INFO,
    format="%(asctime)s - %(levelname)s -%(message)s"
)

logging.info(f"PATH: {ROOT_OUTPUT}")

Created directory: /home/bio/Desktop/jupyter_test/antibody_test/output_20241129_101159


In [3]:
# create configuration folder
configuration_path = os.path.join(os.getcwd(),"configuration")
os.mkdir(configuration_path)
print(f"Create directory: {configuration_path}")
os.chdir(configuration_path)

Create directory: /home/bio/Desktop/jupyter_test/antibody_test/output_20241129_101159/configuration


In [4]:
# build folders
# running_num, MAKE IT 1 NOW, NEED TO CHANGE

def build_folders(current_dir, running_num = 1):
    # Create folder for each cycle
    folders  ={}
    
    for cycle_n in range (1,running_num + 1):
        folder_name = f"cycle{cycle_n}_MD"
        folder_path = os.path.join(current_dir, folder_name)
        os.makedirs(folder_path, exist_ok = True)
        folders[f"cycle{cycle_n}_MD"] = folder_path

    folders["repository"] = os.path.join(current_dir,"REPOSITORY")
    folders["TEMP_FILES_FOLDER"] = os.path.join(current_dir,"TEMP_FILES_FOLDER")
    folders["REMOVED_FILES_FOLDER"] = os.path.join(current_dir,"REMOVED_FILES_FOLDER")
    folders["results"] = os.path.join(current_dir,"RESULTS")

    for folder in folders.values():
        os.makedirs(folder,exist_ok = True)
        
    header = [
    "#RUNnumber", "DeltaG(kJ/mol)", "Coul(kJ/mol)", "vdW(kJ/mol)",
    "PolSol(kJ/mol)", "NpoSol(kJ/mol)", "ScoreFunct", "ScoreFunct2",
    "Canonica_AVG", "MedianDG", "DeltaG_2s", "dG_PotEn"]

    df = pd.DataFrame(columns=header)
    results_file_path = os.path.join(folders["results"], "MoleculesResults.dat")
    df.to_csv(results_file_path, sep='\t', index=False, header=True)
    return folders
    

In [5]:
current_dir = os.getcwd()
folders = build_folders(current_dir)

In [6]:
# generating a topology and build box
make_top_protein(protein_file_path, "amber99sb-ildn", "tip3p", "system", "topol")

In [7]:
# cp system.pdb {protein_infile}.pdb in current folder
source = os.path.join(current_dir, "system.pdb")
destination = os.path.join(current_dir, f"{protein_infile }.pdb")
try:
    shutil.copy(source,destination)
except Exception:
    print("Copy system.pdb failed.")

In [8]:
## OC2 DOESN'T NORMAL ONE, NEED TO CHANGE
def add_ter_to_pdb(pdb_file_name):
    
    temp_file_name = f"{pdb_file_name}_temp"  
    with open(pdb_file_name, 'r') as f:
        lines = f.readlines()

    new_lines = []  
    for i, line in enumerate(lines):
        new_lines.append(line)
        if "OC2" in line:
            if i + 1 >= len(lines) or not lines[i + 1].startswith("TER"):
                new_lines.append("TER\n")

    with open(temp_file_name, 'w') as f:
        f.writelines(new_lines)

    os.rename(temp_file_name, pdb_file_name)

In [9]:
add_ter_to_pdb(f"{protein_infile }.pdb")

In [10]:
def replace_his_(input_pdb, output_pdb):
    # input_pdb: the pdb in configuration/mutant x
    # output_pdb: make it in ROOT_OUTPUT folder
    with open(input_pdb, 'r') as infile:
        data = infile.read()
    data = data.replace("HISD", "HIS").replace("HISE", "HIS").replace("HISP", "HIS")

    with open(output_pdb, 'w') as outfile:
        outfile.write(data)

In [11]:
output_pdb = os.path.join(ROOT_OUTPUT, f"{protein_infile}.pdb")
replace_his_(f"{protein_infile}.pdb",output_pdb)

In [12]:
# Adding water and ions
fill_water_ions("system", "topol", ions_mdp_path)
# Energy Minimiization
energy_min(minim_mdp_path, "system_ions", "topol", "system_compl")

In [13]:
# RESIDUAL SELECTION

In [14]:
# Nvt and Npt
sequence = 0
make_new_minim_nvt_npt("system_compl_minim.gro", nvt_mdp_path, npt_mdp_path, "system_equil", 0)


Statistics over 50001 steps [ 0.0000 through 50.0000 ps ], 1 data sets
All statistics are over 501 points

Energy                      Average   Err.Est.       RMSD  Tot-Drift
-------------------------------------------------------------------------------
Temperature                  293.18        6.5     19.427    35.1084  (K)

Statistics over 50001 steps [ 0.0000 through 100.0000 ps ], 2 data sets
All statistics are over 501 points

Energy                      Average   Err.Est.       RMSD  Tot-Drift
-------------------------------------------------------------------------------
Pressure                   0.333853         --    91.1532    10.3582  (bar)
Density                     1003.21         --    1.02463   -1.15174  (kg/m^3)


                      :-) GROMACS - gmx energy, 2024.4 (-:

Executable:   /opt/gromacs-2024.4/bin/gmx
Data prefix:  /opt/gromacs-2024.4
Working dir:  /home/bio/Desktop/jupyter_test/antibody_test/output_20241129_101159/configuration
Command line:
  gmx energy -f NVT.edr -o temp_NVT.xvg

Opened NVT.edr as single precision energy file

Select the terms you want from the following list by
selecting either (part of) the name or the number or a combination.
End your selection with an empty line or a zero.
-------------------------------------------------------------------
  1  Bond             2  Angle            3  Proper-Dih.      4  Per.-Imp.-Dih.
  5  LJ-14            6  Coulomb-14       7  LJ-(SR)          8  Disper.-corr. 
  9  Coulomb-(SR)    10  Coul.-recip.    11  Position-Rest.  12  Potential     
 13  Kinetic-En.     14  Total-Energy    15  Conserved-En.   16  Temperature   
 17  Pres.-DC        18  Pressure        19  Constr.-rmsd    20  Vir-XX        
 21  Vir-XY          22  Vi

In [15]:
# Move .cpt, .top, and .itp files to repository folder
for file_pattern in [f"{current_dir}/*.cpt", f"{current_dir}/*.top", f"{current_dir}/*.itp"]:
    for file in glob.glob(file_pattern):
        shutil.move(file, folders["repository"])

# Move specific files to repository folder
shutil.move(f"{current_dir}/{protein_infile}.pdb", folders["repository"])
shutil.move(f"{current_dir}/system_compl_minim.gro", folders["repository"])
shutil.move(f"{current_dir}/system_equil.gro", folders["repository"])


# Move temp* and *out files to removed files folder
for file in glob.glob("./*temp*.*") + glob.glob("./*.temp") + glob.glob("./*out"):
    shutil.move(file, folders["REMOVED_FILES_FOLDER"])

# Remove files with # in their name
for file in glob.glob("./#*"):
    os.remove(file)


In [16]:
#NEED TO CHANGE cycle{cycle_number}_MD
cycle_MD_path = os.path.join(current_dir,f"cycle1_MD")
os.chdir(cycle_MD_path)

In [17]:
import shutil
import glob

shutil.copy(os.path.join(folders["repository"], "system_equil.gro"), "./")
shutil.copy(os.path.join(folders["repository"], "topol.top"), "./")

for itp_file in glob.glob(os.path.join(folders["repository"], "*rotein_chain_*.itp")):
    shutil.copy(itp_file, "./")

for itp_file in glob.glob(os.path.join(folders["repository"], "posres_*.itp")):
    shutil.copy(itp_file, "./")

for cpt_file in glob.glob(os.path.join(folders["repository"], "*NPT*.cpt")):
    shutil.copy(cpt_file, "./")

In [18]:
make_new_minim_config_samd("system_equil.gro", samd_mdp_path, "system_Compl_MDstart", 0)

                      :-) GROMACS - gmx energy, 2024.4 (-:

Executable:   /opt/gromacs-2024.4/bin/gmx
Data prefix:  /opt/gromacs-2024.4
Working dir:  /home/bio/Desktop/jupyter_test/antibody_test/output_20241129_101159/configuration/cycle1_MD
Command line:
  gmx energy -f SAMD.edr -o press_SAMD.xvg

Opened SAMD.edr as single precision energy file

Select the terms you want from the following list by
selecting either (part of) the name or the number or a combination.
End your selection with an empty line or a zero.
-------------------------------------------------------------------
  1  Bond             2  Angle            3  Proper-Dih.      4  Per.-Imp.-Dih.
  5  LJ-14            6  Coulomb-14       7  LJ-(SR)          8  Disper.-corr. 
  9  Coulomb-(SR)    10  Coul.-recip.    11  Potential       12  Kinetic-En.   
 13  Total-Energy    14  Conserved-En.   15  Temperature     16  Pres.-DC      
 17  Pressure        18  Constr.-rmsd    19  Box-X           20  Box-Y         
 21  Box-Z   


Statistics over 175001 steps [ 0.0000 through 350.0000 ps ], 3 data sets
All statistics are over 1751 points

Energy                      Average   Err.Est.       RMSD  Tot-Drift
-------------------------------------------------------------------------------
Temperature                 340.073         13    27.2198   -51.3383  (K)
Pressure                   0.331511         10    97.3112   -43.9179  (bar)
Density                     976.488         13    28.1665    48.6103  (kg/m^3)


Selected 1: 'Protein'
Selected 1: 'Protein'
SAMD completed successfully!


Reading frame      50 time  350.000   Last frame         50 time  350.000   

GROMACS reminds you: "Proceed, With Fingers Crossed" (TeX)



In [19]:
run_md(md_mdp_path,"system_Compl_MD", "traj_MD", 0, 1)

10:13:25 -- Running MD 


                      :-) GROMACS - gmx energy, 2024.4 (-:

Executable:   /opt/gromacs-2024.4/bin/gmx
Data prefix:  /opt/gromacs-2024.4
Working dir:  /home/bio/Desktop/jupyter_test/antibody_test/output_20241129_101159/configuration/cycle1_MD
Command line:
  gmx energy -f PROD.edr -o PROD0.xvg

Opened PROD.edr as single precision energy file

Select the terms you want from the following list by
selecting either (part of) the name or the number or a combination.
End your selection with an empty line or a zero.
-------------------------------------------------------------------
  1  Bond             2  Angle            3  Proper-Dih.      4  Per.-Imp.-Dih.
  5  LJ-14            6  Coulomb-14       7  LJ-(SR)          8  Disper.-corr. 
  9  Coulomb-(SR)    10  Coul.-recip.    11  Potential       12  Kinetic-En.   
 13  Total-Energy    14  Conserved-En.   15  Temperature     16  Pres.-DC      
 17  Pressure        18  Constr.-rmsd    19  Box-X           20  Box-Y         
 21  Box-Z        


Statistics over 2500001 steps [ 0.0000 through 5000.0000 ps ], 3 data sets
All statistics are over 25001 points

Energy                      Average   Err.Est.       RMSD  Tot-Drift
-------------------------------------------------------------------------------
Temperature                 309.971      0.042    1.29192  -0.239254  (K)
Pressure                    1.99385          1     99.977    4.94281  (bar)
Density                     1006.85      0.077    2.08503   0.328583  (kg/m^3)


Reading frame      80 time 1600.000   Reading frame      90 time 1800.000   Reading frame     100 time 2000.000   Reading frame     110 time 2200.000   Reading frame     120 time 2400.000   Reading frame     130 time 2600.000   Reading frame     140 time 2800.000   Reading frame     150 time 3000.000   Reading frame     160 time 3200.000   Reading frame     170 time 3400.000   Reading frame     180 time 3600.000   Reading frame     190 time 3800.000   Reading frame     200 time 4000.000   

Selected 1: 'Protein'
Selected 1: 'Protein'




GROMACS reminds you: "We cannot wait for Nature's good graces - to take them from her is our goal" (Ivan Michurin)



In [20]:
#ConfName = f"cycle{cycle_number}"
ConfName = "cycle1"
#RootName = f"cycle{cycle_number}_BE"
RootName = "cycle1_BE"
cycle_number_MD_FOLDER = folders["cycle1_MD"]

In [21]:
os.chdir(cycle_number_MD_FOLDER )

In [22]:
repository_pdb_file = os.path.join(folders["repository"], f"{protein_infile}.pdb")
#startingFrameGMXPBSA="2000"
# make files for gmx_mmpbsa
# files_gmxmmpbsa(starting_gro_file, repository_pdb_file, trj_file, tpr_file, top_file, mdp_name, root_name, conf_name, vmd_function_folder, temp_files_folder)

files_gmxmmpbsa("system_Compl_MD", repository_pdb_file, "traj_MD", "system_Compl_MD", "topol", only_protein_md_mdp_path, RootName, ConfName, VMD_DIR, folders["TEMP_FILES_FOLDER"])


                     :-) GROMACS - gmx make_ndx, 2024.4 (-:

Executable:   /opt/gromacs-2024.4/bin/gmx
Data prefix:  /opt/gromacs-2024.4
Working dir:  /home/bio/Desktop/jupyter_test/antibody_test/output_20241129_101159/configuration/cycle1_MD
Command line:
  gmx make_ndx -f system_Compl_MD.gro -o index.ndx


Reading structure file

GROMACS reminds you: "We cannot wait for Nature's good graces - to take them from her is our goal" (Ivan Michurin)



		--running MAKE_NDX to create index.ndx..
Going to read 0 old index file(s)
Analysing residue names:
There are:   338    Protein residues
There are: 21382      Water residues
There are:   131        Ion residues
Analysing Protein...

  0 System              : 69313 atoms
  1 Protein             :  5036 atoms
  2 Protein-H           :  2563 atoms
  3 C-alpha             :   338 atoms
  4 Backbone            :  1014 atoms
  5 MainChain           :  1355 atoms
  6 MainChain+Cb        :  1653 atoms
  7 MainChain+H         :  1685 atoms
  8 SideChain           :  3351 atoms
  9 SideChain-H         :  1208 atoms
 10 Prot-Masses         :  5036 atoms
 11 non-Protein         : 64277 atoms
 12 Water               : 64146 atoms
 13 SOL                 : 64146 atoms
 14 non-Water           :  5167 atoms
 15 Ion                 :   131 atoms
 16 Water_and_ions      : 64277 atoms

 nr : group      '!': not  'name' nr name   'splitch' nr    Enter: list groups
 'a': atom       '&': and  'del' nr    

GRO to PDB completed successfully: mtbind
		--running TRJCONV to remove the pbc from the trajectory..


		--TRJCONV completed successfully!
		--running MAKE_NDX to make index with only receptor, ligand and complex..
		--MAKE_NDX completed successfully!
		--Creating a protein-only topology file...
		--Protein topology file created successfully!
		--running GROMPP to make a protein tpr..


		--GROMPP completed successfully!
		--Counting HIS residues in the PDB file..
		--Found HIS residues: 0
Files created successfully!


In [23]:
# get number of frames
try:
    with open("trj_check.out", "r") as file:
        number_of_frames = next(
            (line.split()[1] for line in file if line.startswith("Step")), None
        )
except FileNotFoundError:
    print(f"Error: File trj_check.out not found.")
    number_of_frames = None
print(number_of_frames)

151


In [24]:
conda_path = shutil.which("conda")
conda_activate_path = os.path.dirname(conda_path)

In [25]:
#conda_activate_path="/home/bio/ls/bin"
conda_gmxmmpbsa_name="gmxMMPBSA"

In [26]:
forcefield="amber99sb-ildn"
#FORCE_FIELD_PATH = "/home/bio/Desktop/jupyter_test/antibody_test/FORCE_FIELD"
mmpbsa_inFILE="mmpbsa_LinearPB_amber99SB_ILDN.in"
#MMPBSA_INFILE_PATH = "/home/bio/Desktop/jupyter_test/antibody_test/gmx_mmpbsa_in"
np_value = 32

In [27]:
# Example usage
gmx_mmpbsa(1, conda_activate_path, conda_gmxmmpbsa_name, cycle_number_MD_FOLDER, ConfName, RootName, forcefield, FORCE_FIELD_PATH, 
                 mmpbsa_inFILE, MMPBSA_INFILE_PATH , np_value, number_of_frames)

NP_value=32 	 number_of_frames=151 	 NP_used=16


Finished gmx_MMPBSA on cycle1


In [28]:
        command = f"mpirun -np 16 gmx_MMPBSA MPI -O -i mmpbsa_LinearPB_amber99SB_ILDN.in -cs cycle1_newGRO.tpr -ci index.ndx " \
          f"-cg 0 1 -ct cycle1_noPBC.xtc -cr ./cycle1_starting_protein.pdb -cp topol_protein.top " \
          f"-eo gmx_MMPBSA_plot.csv -deo FINAL_DECOMP_MMPBSA.csv -nogui"