In [None]:
import os
import datetime
import logging
import shutil
import pandas as pd
import glob
import multiprocessing
import re
import subprocess
import numpy as np
import math
import random
import yaml

from FUNCTION import make_top_protein, fill_water_ions, energy_min, make_new_minim_nvt_npt, run_md
from FUNCTION import files_gmxmmpbsa, gmx_mmpbsa, Data_Analysis_Pre, Data_Analysis_Cal, clean_for_each_cycle, GRO_to_PDB
from FUNCTION import Data_Analysis_Cal_child


def load_config(config_file="infile.yaml"):
    with open(config_file, "r") as file:
        config = yaml.safe_load(file)
    return config
config = load_config()
conda_path = config['Basic_setting']['conda_activate_script_path']
VMD_path = config['Basic_setting']['VMD_path']
gmx_path = config['Basic_setting']['GROMACS_executable_path']

conda_gmxmmpbsa_name = config['Basic_setting']['conda_gmx_MMPBSA_name']
conda_modeller_name = config['Basic_setting']['conda_Modeller_name']
conda_actiavte_path = os.path.join(conda_path,"activate")


def get_version(command):
    try:
        results = subprocess.run([command,"--version"],capture_output=True, text=True, check=True)
        return results.stdout.strip()
    except subprocess.CalledProcessError:
        return None

def which_program(command):
    try:
        results = subprocess.run(["which",command],capture_output=True, text=True, check=True)
        return results.stdout.strip()
    except subprocess.CalledProcessError:
        return None
        
# check conda
if not os.path.isfile(conda_actiavte_path):
    print(f"ERROR: cannot find conda activate scrpt path as {conda_actiavte_path}")
else:
    print(f"conda activate path --> {conda_actiavte_path} version: {get_version("conda")}")
    
# check python
python_version = get_version("python")
print(f"Python --> {which_program('python')} version: {python_version}")

# check VMD
if VMD_path:
    if os.path.isfile(VMD_path) and os.access(VMD_path, os.X_OK):
        print (f"VMD path --> {VMD_path} ")
    else:
        print(f"ERROR: cannot find VMD path as {VMD_path}")
else:
    print(f"VMD path --> {which_program('vmd')} ")

# check gromacs
if gmx_path:
    if os.path.isfile(gmx_path) and os.access(gmx_path, os.X_OK):
        print (f"gmx path --> {gmx_path} ")
    else:
        print(f"ERROR: cannot find gmx path as {gmx_path}")
else:
    print(f"gmx path --> {which_program('gmx')} ")


# check gmx_mmpbsa

# check modeller
def check_conda_env(env):
    try:
        check_env = subprocess.run(['conda', 'info', '--envs'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True)
        if env in check_env.stdout:
            return True
        else:
            return False
    except subprocess.CalledProcessError:
        return False

if check_conda_env(conda_gmxmmpbsa_name):
    print(f"{conda_gmxmmpbsa_name} is installed")
else:
    print(f"No {conda_gmxmmpbsa_name} founded, please check that")

if check_conda_env(conda_modeller_name):
    print(f"{conda_modeller_name} is installed")
else:
    print(f"No {conda_modeller_name} founded, please check that")


# check parameters
receptorFRAG = str(config['gmx_mmpbsa']['receptorFRAG'])
ABchains = str(config['gmx_mmpbsa']['ABchains'])
startingFrameGMXPBSA = config['gmx_mmpbsa']['startingFrameGMXPBSA']
#protein_infile = config['input_files']['structure_infile_name']
protein_file_path = config['input_files']['structure_file_path']

protein_infile = os.path.basename(protein_file_path)
protein_infile, _ =os.path.splitext(protein_infile)

#PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
PROJECT_ROOT = os.getcwd()

DATA_DIR = os.path.join(PROJECT_ROOT, "DATA")
VMD_DIR = os.path.join(PROJECT_ROOT, "VMD_FUNCTION")
FUNCTION_DIR = os.path.join(PROJECT_ROOT, "FUNCTION")
FORCE_FIELD_PATH = os.path.join(PROJECT_ROOT, "FORCE_FIELD")
MMPBSA_INFILE_PATH = os.path.join(PROJECT_ROOT, "gmx_mmpbsa_in")
# pdb file
#protein_infile = "HLA_BiAB_protein_50ns" 
#protein_infile = "mtbind"
#protein_infile = "antibody_zixuan"

#protein_file_path = os.path.join(DATA_DIR, f"{protein_infile}.pdb")

make_mutation_modeller_py = os.path.join(FUNCTION_DIR,"MakeNewMutant_Modeller.py") 
# MDP files
ions_mdp_file = "ions"
minim_mdp_file = "minim"
nvt_mdp_file = "NVT"
npt_mdp_file = "NPT"
#samd_mdp_file = "SAMD"
md_mdp_file = "EngComp_ff14sb_custom"
only_protein_md_mdp_file = "Protein_EngComp_ff14sb_custom"

ions_mdp_path = os.path.join(DATA_DIR, f"{ions_mdp_file}.mdp")
minim_mdp_path = os.path.join(DATA_DIR, f"{minim_mdp_file}.mdp")
nvt_mdp_path = os.path.join(DATA_DIR, f"{nvt_mdp_file}.mdp")
npt_mdp_path = os.path.join(DATA_DIR, f"{npt_mdp_file}.mdp")
#samd_mdp_path = os.path.join(DATA_DIR, f"{samd_mdp_file}.mdp")
md_mdp_path = os.path.join(DATA_DIR, f"{md_mdp_file}.mdp")
only_protein_md_mdp_path = os.path.join(DATA_DIR, f"{only_protein_md_mdp_file}.mdp")



def create_output_directory():
    
    current_dir = os.getcwd()
    
    timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
    output_dir_name = f"output_{timestamp}"
    output_dir_path = os.path.join(current_dir, output_dir_name)
    
    os.mkdir(output_dir_path)
    print(f"Created directory: {output_dir_path}")

    os.chdir(output_dir_path)

    
    
    return output_dir_path

ROOT_OUTPUT = create_output_directory()

logging.basicConfig(
    filename = "OUTPUT.out",
    level = logging.INFO,
    format="%(asctime)s - %(levelname)s -%(message)s"
)

logging.info(f"ROOT FOLDER PATH: {ROOT_OUTPUT}")

conda activate path --> /home/bio/ls/bin/activate version: conda 24.11.1
Python --> /home/bio/ls/bin/python version: Python 3.12.7
VMD path --> /usr/local/bin/vmd 
gmx path --> /opt/gromacs-2024.4/bin/gmx 
gmxMMPBSA is installed
modeller is installed
Created directory: /home/bio/Desktop/jupyter_test/antibody_test/output_20241218_164659


In [2]:
def build_folders(current_dir, cycle_num):
    # Create folder for each cycle
    folders  ={}
    
    for cycle_n in range (1,cycle_num + 1):
        folder_name = f"cycle{cycle_n}_MD"
        folder_path = os.path.join(current_dir, folder_name)
        os.makedirs(folder_path, exist_ok = True)
        folders[f"cycle{cycle_n}_MD"] = folder_path

    folders["repository"] = os.path.join(current_dir,"REPOSITORY")
    folders["TEMP_FILES_FOLDER"] = os.path.join(current_dir,"TEMP_FILES_FOLDER")
    folders["REMOVED_FILES_FOLDER"] = os.path.join(current_dir,"REMOVED_FILES_FOLDER")
    folders["results"] = os.path.join(current_dir,"RESULTS")

    for folder in folders.values():
        os.makedirs(folder,exist_ok = True)
    '''
    header = [
    "#RUNnumber", "DeltaG(kJ/mol)", "Coul(kJ/mol)", "vdW(kJ/mol)",
    "PolSol(kJ/mol)", "NpoSol(kJ/mol)", "ScoreFunct", "ScoreFunct2",
    "Canonica_AVG", "MedianDG", "DeltaG_2s", "dG_PotEn"]
    '''
    
    header = [
    "#RUNnumber", "DeltaG(kJ/mol)", "Coul(kJ/mol)", "vdW(kJ/mol)",
    "PolSol(kJ/mol)", "NpoSol(kJ/mol)", "ScoreFunct", "ScoreFunct2",
    "Canonica_AVG", "MedianDG", "DeltaG_2s"]

    df = pd.DataFrame(columns=header)
    results_file_path = os.path.join(folders["results"], "MoleculesResults.dat")
    df.to_csv(results_file_path, sep='\t', index=False, header=True)

    return folders

def add_ter_to_pdb(pdb_file_name):
    temp_file_name = f"{pdb_file_name}_temp"  

    with open(pdb_file_name, 'r') as f:
        lines = f.readlines()

    new_lines = []
    prev_chain_id = None  
    num_lines = len(lines)

    for i, line in enumerate(lines):
  
        if not line.startswith("ATOM"):
            new_lines.append(line)
            continue


        current_chain_id = line[21]  # get chain ID (the 22rd column)

        # add
        new_lines.append(line)


        if prev_chain_id is not None:

            if (i == num_lines - 1 or 
                (lines[i + 1].startswith("ATOM") and lines[i + 1][21] != current_chain_id) or 
                not lines[i + 1].startswith("ATOM")):
                new_lines.append("TER\n")

 
        prev_chain_id = current_chain_id


    with open(temp_file_name, 'w') as f:
        f.writelines(new_lines)

  
    os.rename(temp_file_name, pdb_file_name)

def replace_his_residues_flexible(input_pdb, output_pdb):
    with open(input_pdb, "r", encoding="utf-8") as infile, open(output_pdb, "w", encoding="utf-8") as outfile:
        for line in infile:
            if line.startswith("ATOM"):
                match = re.match(r"(.{17})(HISE|HISD|HISP)(.*)", line)
                if match:
                    line = f"{match.group(1)}{'HIS':<4}{match.group(3)}"
            
            outfile.write(line.rstrip('\n') + '\n')

def MD_for_each_cycle(work_dir, cycle_number,sequence, md_mdp_path, tpr_file, trj_name, gmx_path):
    print("start MD in MD function")
    #cycle_number = 1
    #while cycle_number <= cycle_num:
        
    cycle_MD_path = folders[f"cycle{cycle_number}_MD"] 
    os.chdir(work_dir)
    shutil.copy(os.path.join(folders["repository"], "system_equil.gro"), "./")
    shutil.copy(os.path.join(folders["repository"], "topol.top"), "./")

    for itp_file in glob.glob(os.path.join(folders["repository"], "*rotein_chain_*.itp")):
        shutil.copy(itp_file, "./")

    for itp_file in glob.glob(os.path.join(folders["repository"], "posres_*.itp")):
        shutil.copy(itp_file, "./")

    for cpt_file in glob.glob(os.path.join(folders["repository"], "*NPT*.cpt")):
        shutil.copy(cpt_file, "./")

    #make_new_minim_config_samd("system_equil.gro", samd_mdp_path, "system_Compl_MDstart", 0)
    #make_new_minim_config_samd(input_structure_file, samd_mdp_path, output_gro, sequence)
    #run_md(md_mdp_path,"system_Compl_MD", "traj_MD", 0, 1)
    run_md(md_mdp_path, tpr_file, trj_name, sequence, cycle_number, gmx_path)
    shutil.copy("system_Compl_MD.gro", f"LastFrame_cycle{cycle_number}.gro")
    #cycle_number += 1


def gmx_mmpbsa_for_each_cycle(work_dir, cycle_number,only_protein_md_mdp_path,VMD_DIR,temp_files_folder, FORCE_FIELD_PATH, MMPBSA_INFILE_PATH, REMOVED_FILES_FOLDER, results_folder, repository_folder, current_conf_path):
    #cycle_number = 1
    #while cycle_number <= cycle_num:
    ConfName = f"cycle{cycle_number}"
    RootName = f"cycle{cycle_number}_BE"
    cycle_number_MD_FOLDER = folders[f"cycle{cycle_number}_MD"]
    # 输出相关信息
    print(f"Cycle Number: {cycle_number}")
    print(f"Configuration Name: {ConfName}")
    print(f"Root Name: {RootName}")
    print(f"MD Folder Path: {cycle_number_MD_FOLDER}")
    #os.chdir(cycle_number_MD_FOLDER )
    os.chdir(work_dir)
        
    repository_pdb_file = os.path.join(repository_folder, f"{protein_infile}.pdb")
    #startingFrameGMXPBSA="2000"
    # make files for gmx_mmpbsa
    # files_gmxmmpbsa(starting_gro_file, repository_pdb_file, trj_file, tpr_file, top_file, mdp_name, root_name, conf_name, vmd_function_folder, temp_files_folder)

    files_gmxmmpbsa("system_Compl_MD", repository_pdb_file, "traj_MD", "system_Compl_MD", "topol", only_protein_md_mdp_path, RootName, ConfName, VMD_DIR, temp_files_folder, cycle_number, startingFrameGMXPBSA, receptorFRAG, ABchains,gmx_path)
    # get number of frames
    try:
        with open("trj_check.out", "r") as file:
            number_of_frames = next(
                (line.split()[1] for line in file if line.startswith("Step")), None
            )
    except FileNotFoundError:
        print(f"Error: File trj_check.out not found.")
        number_of_frames = None
    #conda_activate_path="/home/bio/ls/bin"

    #conda_gmxmmpbsa_name="gmxMMPBSA"
    forcefield="amber99sb-ildn"
    #FORCE_FIELD_PATH = "/home/bio/Desktop/jupyter_test/antibody_test/FORCE_FIELD"
    mmpbsa_inFILE="mmpbsa_LinearPB_amber99SB_ILDN.in"
    #MMPBSA_INFILE_PATH = "/home/bio/Desktop/jupyter_test/antibody_test/gmx_mmpbsa_in"
    np_value = config['run']['num_processors']
    #gmx_mmpbsa(1, conda_activate_path, conda_gmxmmpbsa_name, cycle_number_MD_FOLDER, ConfName, RootName, forcefield, FORCE_FIELD_PATH, 
    #             mmpbsa_inFILE, MMPBSA_INFILE_PATH , np_value, number_of_frames)
    gmx_mmpbsa(cycle_number, conda_actiavte_path, conda_gmxmmpbsa_name, cycle_number_MD_FOLDER, ConfName, RootName, forcefield, FORCE_FIELD_PATH, mmpbsa_inFILE, MMPBSA_INFILE_PATH, np_value, number_of_frames)
    # data analysis
    NUMframe = "all"
    Data_Analysis_Pre(cycle_number_MD_FOLDER, REMOVED_FILES_FOLDER, NUMframe)
    Data_Analysis_Cal(cycle_number, results_folder)
    # clean and move files
    clean_for_each_cycle(cycle_number, repository_folder, cycle_number_MD_FOLDER, RootName, REMOVED_FILES_FOLDER, current_conf_path)
    #cycle_number += 1
    #conf_name = f"cycle{cycle_number}"
    #root_name = f"cycle{cycle_number}_BE"
    #cycle_number_md_folder = os.path.join(current_conf_path, f"cycle{cycle_number}_MD")

def run_cycle(cycle_number, cycle_num, md_args, gmx_args):
    """
    deal with gmx_MMPBSA for current cycle and MD for the next cycle
    """
    process = []

    # current cycle gmx_mmpbsa
    gmx_process = multiprocessing.Process(target=gmx_mmpbsa_for_each_cycle, args=(folders[f"cycle{cycle_number}_MD"],cycle_number, *gmx_args))
    process.append(gmx_process)
    gmx_process.start()

    # if we have next cycle，run MD for the next cycle 
    if cycle_number < cycle_num:
        next_md_process = multiprocessing.Process(target=MD_for_each_cycle, args=(folders[f"cycle{cycle_number+1}_MD"], cycle_number + 1, *md_args))
        process.append(next_md_process)
        next_md_process.start()

    # all processes finished
    for p in process:
        p.join()

In [3]:
max_mutant = config['modeller']['max_mutant']
cycle_num = 2 # the run cycle numbers for each configuration  Default:10
MUTANT_signal = False
#Stored Average BE from the last configuration. - Default: no
#Stored_AVG= -92.8

#Stored BE standard deviation from the last configuration. - Default: no
#Stored_STD= 4.3
#Metropolis Temperature - Default: 2
Metropolis_temp = 1.5
#Metropolis Temperature top limit - Default: 4
Metropolis_Temp_cap= 3

#Metropolis Temperature Used during the calculations. It could change. - Default: 
Eff_Metropolis_Temp= Metropolis_temp

#Number of consecutive discarded results. - Default: 0
Consecutive_DISCARD_Count= 4

for sequence in range (0,max_mutant+1):
    try:
        os.chdir(ROOT_OUTPUT)
    except OSError:
        logging.error(f"Cannot enter {ROOT_OUTPUT} folder")
        exit()

    if sequence == 0:
        
        # create configuration folder
        configuration_path = os.path.join(ROOT_OUTPUT,"configuration")
        os.mkdir(configuration_path)
        current_path_store = configuration_path
        
        print(f"Create directory: {configuration_path}")
        os.chdir(configuration_path)
        logging.info(f"#### Begin with configuration{sequence} ####")
        logging.info(f"PATH : {configuration_path}")
    else:
        mutant_folder_path = os.path.join(ROOT_OUTPUT,f"Mutant{sequence}")
        os.mkdir(mutant_folder_path)
        current_path_store = mutant_folder_path
        
        print(f"Create directort: {mutant_folder_path}")
        os.chdir(mutant_folder_path)
        logging.info(f"#### Begin with Mutant{sequence} ####")
        logging.info(f"PATH : {mutant_folder_path}")

    if MUTANT_signal == True:
        #attempts = 1
        new_mutant = True
        while new_mutant == True:
            # pdb_file, res_position, chain, new_restype, res_pos_list,res_weight_files, new_restype_list, keep_hydration, output_name
            pdb_file = os.path.join(ROOT_OUTPUT,f"{protein_infile}.pdb") #LastFRame_xxxx.pdb
            #res_position = None
            #chain = None
            #new_restype = None
            res_pos_list = config['modeller']['res_pos_list']
            #new_restype_list = ['LEU', 'VAL', 'ILE', 'MET', 'PHE', 'TYR', 'TRP','GLU', 'ASP','ARG', 'LYS','SER', 'THR', 'ASN', 'GLN', 'HIS']

            # NO CYS MET GLY PRO
            new_restype_list = ['LEU', 'VAL', 'ILE', 'PHE', 'TYR', 'TRP','GLU', 'ASP','ARG', 'LYS','SER', 'THR', 'ASN', 'GLN', 'HIS']
            output_name = f"Mutant{sequence}"

            logging.info("Making a new mutation.")
            #keep_hydration = False
            #make_new_mutation(pdb_file, res_position, chain, new_restype, res_pos_list,res_weight_files, new_restype_list, keep_hydration, output_name)
            command_mutant = (f"python {make_mutation_modeller_py} {pdb_file} -o ./Mutant{sequence} -rl {res_pos_list}-v")
            subprocess.run(command_mutant, shell =True, check = True)
            #attempts += 1
            new_mutant =False
        
        # Check that the sequence hasn't be tested already (self avoiding walk)
        #os.remove(pdb_file)
        protein_infile= f"Mutant{sequence}"

        protein_file_path= os.path.join(current_path_store, f"{protein_infile}.pdb")
        #destination_file = os.path.join(current_path_store, f"{protein_infile}_noH.pdb")
        #shutil.copy(protein_file_path, destination_file)


    Metropolis_flag= 0
    current_dir = os.getcwd()
    folders = build_folders(current_dir,cycle_num)

    # generating a topology and build box
    make_top_protein(protein_file_path, "amber99sb-ildn", "tip3p", "system", "topol", gmx_path)

    # cp system.pdb {protein_infile}.pdb in current folder
    source = os.path.join(current_dir, "system.pdb")
    destination = os.path.join(current_dir, f"{protein_infile }.pdb")
    try:
        shutil.copy(source,destination)
    except Exception:
        print("Copy system.pdb failed.")

    add_ter_to_pdb(f"{protein_infile }.pdb")
    output_pdb = os.path.join(ROOT_OUTPUT, f"{protein_infile}.pdb")
    replace_his_residues_flexible(f"{protein_infile}.pdb",output_pdb)
    # Adding water and ions
    fill_water_ions("system", "topol", ions_mdp_path, gmx_path)
    # Energy Minimiization
    energy_min(minim_mdp_path, "system_ions", "topol", "system_compl",gmx_path)

    
    # Nvt and Npt
    make_new_minim_nvt_npt("system_compl_minim.gro", nvt_mdp_path, npt_mdp_path, "system_equil", 0, gmx_path)

    # Move .cpt, .top, and .itp files to repository folder
    for file_pattern in [f"{current_dir}/*.cpt", f"{current_dir}/*.top", f"{current_dir}/*.itp"]:
        for file in glob.glob(file_pattern):
            shutil.move(file, folders["repository"])

    # Move specific files to repository folder
    shutil.move(f"{current_dir}/{protein_infile}.pdb", folders["repository"])
    shutil.move(f"{current_dir}/system_compl_minim.gro", folders["repository"])
    shutil.move(f"{current_dir}/system_equil.gro", folders["repository"])


    # Move temp* and *out files to removed files folder
    for file in glob.glob("./*temp*.*") + glob.glob("./*.temp") + glob.glob("./*out"):
        shutil.move(file, folders["REMOVED_FILES_FOLDER"])

    # Remove files with # in their name
    for file in glob.glob("./#*"):
        os.remove(file)

    md_args = (sequence, md_mdp_path, "system_Compl_MD", "traj_MD", f"{gmx_path}")
    gmx_args = (only_protein_md_mdp_path,VMD_DIR,folders["TEMP_FILES_FOLDER"], FORCE_FIELD_PATH, MMPBSA_INFILE_PATH, folders["REMOVED_FILES_FOLDER"], folders["results"], folders["repository"], current_path_store)
    # 1st cycle MD
    MD_for_each_cycle(folders["cycle1_MD"],1, *md_args)

    # each cycle: gmx_mmpbsa and next MD
    for cycle_number in range(1, cycle_num + 1):
        run_cycle(cycle_number, cycle_num, md_args, gmx_args)

    
    last_cycle_MD_FOLDER = os.path.join(folders["repository"],f"cycle{cycle_number}_MD")
    last_cycle_gro = os.path.join(last_cycle_MD_FOLDER,f"LastFrame_cycle{cycle_number}.gro")
    shutil.copy(last_cycle_gro, os.path.join(folders["repository"],f"LastFrame_cycle{cycle_number}.gro"))
    logging.info(f"Making the starting PDB for the next Mutation from LastFrame_cycle{cycle_number}.gro")
    
    os.chdir(current_path_store)
    
    repository_pdb_file = os.path.join(folders["repository"], f"{protein_infile}.pdb")

    pathGRO = folders["repository"]
    fileNameGRO = f"LastFrame_cycle{cycle_number}"
    pathPDB = os.path.dirname(repository_pdb_file)
    pdb_name_with_extension = os.path.basename(repository_pdb_file) #xxxx.pdb
    pdb_name_without_extension = os.path.splitext(pdb_name_with_extension)[0] #xxxx
    fileNamePDB = pdb_name_without_extension
    FileNamePDB_OUT = f"LastFrame_cycle{cycle_number}"
    GRO_to_PDB(pathGRO, fileNameGRO, pathPDB, fileNamePDB, FileNamePDB_OUT, VMD_DIR, folders["TEMP_FILES_FOLDER"])
    last_cycle_pdb = os.path.join(folders["repository"], f"LastFrame_cycle{cycle_number}.pdb")
    add_ter_to_pdb(last_cycle_pdb)        
    output_last_cycle_pdb = os.path.join(ROOT_OUTPUT, f"Mutant{sequence}_cycle{cycle_number}_LastFrameMD.pdb")
    replace_his_residues_flexible(last_cycle_pdb,output_last_cycle_pdb)

    protein_infile = f"Mutant{sequence}_cycle{cycle_number}_LastFrameMD"
    logging.info("Making the average of the cycles results.")
    
    all_cycle_data = "All_cycle_data.out"
    MoleculesResults_data = os.path.join(folders["results"], "MoleculesResults.dat")
    data_analysis_temp = "DataAnalysis_temp.csv"

    if os.path.exists(all_cycle_data):
        os.remove(all_cycle_data)

    flag_header = True

    with open(MoleculesResults_data, 'r') as infile, open(all_cycle_data, 'w') as outfile:
        for line in infile:
            if flag_header == True:
                # head row
                outfile.write(f"#{'configNum':<10} \t{line}")
                flag_header = False
            else:
                # data row
                outfile.write(f"{'avg':<10} \t{line}")
    # get the data from the second line
    df = pd.read_csv(all_cycle_data, sep='\t')  
    df_filtered = df.iloc[:, 1:]  # get the data from the second column

    # save to csv
    df_filtered.to_csv(data_analysis_temp, sep = '\t', index=False,header = False)
    #Data_Analysis_Signal = False
    Data_Analysis_Cal_child(data_analysis_temp, "AllData.temp", False)

    frame_count = 0
    # get AVG and STD to AllData.out
    with open("AllData.temp", 'r') as temp_file, open(all_cycle_data, 'a') as outfile:
        for line in temp_file:

            if line.startswith("#frame"):
                frame_count +=1
                if frame_count ==2:
                    outfile.write(line)
            if line.startswith("#AVG") or line.startswith("#STD"):
                outfile.write(line)
    '''
    # remove temp file
    for temp_file in [all_data_temp, data_analysis_temp]:
        shutil.move(temp_file, os.path.join(removed_files_folder, os.path.basename(temp_file)))
    '''
    frame = []
    avg = []
    std = []
    with open(all_cycle_data, 'r') as infile:
        for line in infile:
            if line.startswith("#frame"):
                frame = line.strip().split()[1:]
            elif line.startswith("#AVG"):
                avg = line.strip().split()[1:]
            elif line.startswith("#STD"):
                std = line.strip().split()[1:]
    if frame and avg and std:
        output_lines = ["Results for Configuation"]
        output_lines += [f"{frame[i]}: {avg[i]} +- {std[i]} kJ/mol"
                        for i in range(len(frame))
                       ]
        logging.info("\n".join(output_lines))
    shutil.move("AllData.temp", folders["REMOVED_FILES_FOLDER"])
    shutil.move(data_analysis_temp, folders["REMOVED_FILES_FOLDER"])
    AVG = float(avg[0])
    STD = float(std[0])
    if sequence == 0:
        MUTANT_signal = True
        Stored_AVG = float(avg[0]) # DeltaG(kJ/mol)
        Stored_STD = float(std[0])
        Stored_system_file = protein_infile
        logging.info(f"Finished with Configuration{sequence}")
        sequence+=1
        
        os.chdir(ROOT_OUTPUT)
        # if FAST == TRue delete removed files folder
        continue
    logging.info("Metropolis algorithm")
    Prob = None
    if not Prob:
        RandNum = random.uniform(0,1)
    else:
        RandNum = float(Prob)
    # Metropolis
    MP = math.exp(-(AVG+STD/2-Stored_AVG) / Eff_Metropolis_Temp)
    # new G < old G
    if MP >= 1:
        MP = 1
    logging.info(f"Random Number: {RandNum}  Metropolis Prob: {MP}  AVG: {AVG}  Stored AVG: {Stored_AVG}")
    Metropolis_flag = 1 if RandNum < MP else 0
    if Metropolis_flag == 1:
        logging.info("New Configuration Accepted")
        Stored_AVG = AVG
        Stored_STD = STD
        Stored_system_file = protein_infile
        Consecutive_DISCARD_Count = 0
        Eff_Metropolis_Temp = Metropolis_temp
    else:
        logging.info("New Configuration Declined")
        protein_infile = Stored_system_file
        Consecutive_DISCARD_Count += 1
        
        if Consecutive_DISCARD_Count > 5:
            if Eff_Metropolis_Temp < Metropolis_Temp_cap:
                Eff_Metropolis_Temp += 0.5*(Consecutive_DISCARD_Count - 5)
        if Eff_Metropolis_Temp > Metropolis_Temp_cap:
            Eff_Metropolis_Temp = Metropolis_Temp_cap
    
    MUTANT_signal = True
    logging.info(f"Finished Mutant{sequence}")
    sequence += 1
    os.chdir(ROOT_OUTPUT)



logging.info("ALL DONE.")
    

    

Create directory: /home/bio/Desktop/jupyter_test/antibody_test/output_20241218_164659/configuration


                      :-) GROMACS - gmx energy, 2024.4 (-:

Executable:   /opt/gromacs-2024.4/bin/gmx
Data prefix:  /opt/gromacs-2024.4
Working dir:  /home/bio/Desktop/jupyter_test/antibody_test/output_20241218_164659/configuration
Command line:
  gmx energy -f NVT.edr -o temp_NVT.xvg

Opened NVT.edr as single precision energy file

Select the terms you want from the following list by
selecting either (part of) the name or the number or a combination.
End your selection with an empty line or a zero.
-------------------------------------------------------------------
  1  Bond             2  Angle            3  Proper-Dih.      4  Per.-Imp.-Dih.
  5  LJ-14            6  Coulomb-14       7  LJ-(SR)          8  Disper.-corr. 
  9  Coulomb-(SR)    10  Coul.-recip.    11  Position-Rest.  12  Potential     
 13  Kinetic-En.     14  Total-Energy    15  Conserved-En.   16  Temperature   
 17  Pres.-DC        18  Pressure        19  Constr.-rmsd    20  Vir-XX        
 21  Vir-XY          22  Vi


Statistics over 50001 steps [ 0.0000 through 50.0000 ps ], 1 data sets
All statistics are over 501 points

Energy                      Average   Err.Est.       RMSD  Tot-Drift
-------------------------------------------------------------------------------
Temperature                 293.389        6.4    19.4315    34.7259  (K)

Statistics over 50001 steps [ 0.0000 through 100.0000 ps ], 2 data sets
All statistics are over 501 points

Energy                      Average   Err.Est.       RMSD  Tot-Drift
-------------------------------------------------------------------------------
Pressure                   -2.13388         --    95.8376     13.662  (bar)
Density                     1010.44         --    1.13536    0.35172  (kg/m^3)
start MD in MD function
16:47:43 -- Running MD 


                      :-) GROMACS - gmx energy, 2024.4 (-:

Executable:   /opt/gromacs-2024.4/bin/gmx
Data prefix:  /opt/gromacs-2024.4
Working dir:  /home/bio/Desktop/jupyter_test/antibody_test/output_20241218_164659/configuration/cycle1_MD
Command line:
  gmx energy -f PROD.edr -o PROD0.xvg

Opened PROD.edr as single precision energy file

Select the terms you want from the following list by
selecting either (part of) the name or the number or a combination.
End your selection with an empty line or a zero.
-------------------------------------------------------------------
  1  Bond             2  Angle            3  Proper-Dih.      4  Per.-Imp.-Dih.
  5  LJ-14            6  Coulomb-14       7  LJ-(SR)          8  Disper.-corr. 
  9  Coulomb-(SR)    10  Coul.-recip.    11  Potential       12  Kinetic-En.   
 13  Total-Energy    14  Conserved-En.   15  Temperature     16  Pres.-DC      
 17  Pressure        18  Constr.-rmsd    19  Box-X           20  Box-Y         
 21  Box-Z        


Statistics over 2500001 steps [ 0.0000 through 5000.0000 ps ], 3 data sets
All statistics are over 25001 points

Energy                      Average   Err.Est.       RMSD  Tot-Drift
-------------------------------------------------------------------------------
Temperature                 309.983      0.021    1.22074 -0.0793555  (K)
Pressure                   0.946915       0.72    99.8929    2.67832  (bar)
Density                     1012.09      0.085    2.06868  -0.215468  (kg/m^3)


Reading frame     170 time 3400.000   

Selected 1: 'Protein'
Selected 1: 'Protein'


Reading frame     200 time 4000.000   

GROMACS reminds you: "You Will Be Surprised At What Resides In Your Inside" (Arrested Development)



Cycle Number: 1
Configuration Name: cycle1start MD in MD function

Root Name: cycle1_BE
MD Folder Path: /home/bio/Desktop/jupyter_test/antibody_test/output_20241218_164659/configuration/cycle1_MD
		--running MAKE_NDX to create index.ndx..
16:59:00 -- Running MD 


                     :-) GROMACS - gmx make_ndx, 2024.4 (-:

Executable:   /opt/gromacs-2024.4/bin/gmx
Data prefix:  /opt/gromacs-2024.4
Working dir:  /home/bio/Desktop/jupyter_test/antibody_test/output_20241218_164659/configuration/cycle1_MD
Command line:
  gmx make_ndx -f system_Compl_MD.gro -o index.ndx


Reading structure file

GROMACS reminds you: "You Will Be Surprised At What Resides In Your Inside" (Arrested Development)



Going to read 0 old index file(s)
Analysing residue names:
There are:   413    Protein residues
There are: 23061      Water residues
There are:   141        Ion residues
Analysing Protein...

  0 System              : 75722 atoms
  1 Protein             :  6398 atoms
  2 Protein-H           :  3292 atoms
  3 C-alpha             :   413 atoms
  4 Backbone            :  1239 atoms
  5 MainChain           :  1656 atoms
  6 MainChain+Cb        :  2035 atoms
  7 MainChain+H         :  2063 atoms
  8 SideChain           :  4335 atoms
  9 SideChain-H         :  1636 atoms
 10 Prot-Masses         :  6398 atoms
 11 non-Protein         : 69324 atoms
 12 Water               : 69183 atoms
 13 SOL                 : 69183 atoms
 14 non-Water           :  6539 atoms
 15 Ion                 :   141 atoms
 16 Water_and_ions      : 69324 atoms

 nr : group      '!': not  'name' nr name   'splitch' nr    Enter: list groups
 'a': atom       '&': and  'del' nr         'splitres' nr   'l': list residues
 't

  df = pd.read_csv(input_file, delim_whitespace=True, header=None)
  DeltaG_temp = row[1]  # 使用列名访问 DeltaG


Cycle Number: 2
Configuration Name: cycle2
Root Name: cycle2_BE
MD Folder Path: /home/bio/Desktop/jupyter_test/antibody_test/output_20241218_164659/configuration/cycle2_MD
		--running MAKE_NDX to create index.ndx..


                     :-) GROMACS - gmx make_ndx, 2024.4 (-:

Executable:   /opt/gromacs-2024.4/bin/gmx
Data prefix:  /opt/gromacs-2024.4
Working dir:  /home/bio/Desktop/jupyter_test/antibody_test/output_20241218_164659/configuration/cycle2_MD
Command line:
  gmx make_ndx -f system_Compl_MD.gro -o index.ndx


Reading structure file

GROMACS reminds you: "You should never bet against anything in science at odds of more than about 10^12 to 1." (Ernest Rutherford)



Going to read 0 old index file(s)
Analysing residue names:
There are:   413    Protein residues
There are: 23061      Water residues
There are:   141        Ion residues
Analysing Protein...

  0 System              : 75722 atoms
  1 Protein             :  6398 atoms
  2 Protein-H           :  3292 atoms
  3 C-alpha             :   413 atoms
  4 Backbone            :  1239 atoms
  5 MainChain           :  1656 atoms
  6 MainChain+Cb        :  2035 atoms
  7 MainChain+H         :  2063 atoms
  8 SideChain           :  4335 atoms
  9 SideChain-H         :  1636 atoms
 10 Prot-Masses         :  6398 atoms
 11 non-Protein         : 69324 atoms
 12 Water               : 69183 atoms
 13 SOL                 : 69183 atoms
 14 non-Water           :  6539 atoms
 15 Ion                 :   141 atoms
 16 Water_and_ions      : 69324 atoms

 nr : group      '!': not  'name' nr name   'splitch' nr    Enter: list groups
 'a': atom       '&': and  'del' nr         'splitres' nr   'l': list residues
 't

  df = pd.read_csv(input_file, delim_whitespace=True, header=None)
  DeltaG_temp = row[1]  # 使用列名访问 DeltaG


GRO to PDB completed successfully: HLA_BiAB_protein_50ns


  df = pd.read_csv(input_file, delim_whitespace=True, header=None)
  DeltaG_temp = row[1]  # 使用列名访问 DeltaG
