In [None]:
# --- LAMMPS MD SIMULATION SCEHDULER ---
# 1. Specify the structure from Random_Carbon_Structures
# 2. Specify the potential and simulation_type this simulation is using. Found in "in_nvt" and "lammps_scheduler.sh"
#     a) e.g. NVT, NVE, NPT, etc...
#     b) e.g. GAP17, GAP20, MACE, etc...

# This script will run n repeat MD simulations on the given structure 
# Melt/Quench/Anneal parameters are hard-coded in "lammps_scheduler.sh": 
#   Set to the standard GAP17 melt/quench/anneal protocol

# Run this cell for each structure you want to simulate before running the next cell, which organises files. 

import subprocess, sys
from pathlib import Path
import re

# ------ SIMULATION PARAMETERS ------ 

# This determines the file inputed to LAMMPS
structure = "Random_Carbon_Structures/216_atoms/1.5_gcm"   # Relative filepath to the structure

# These are part of the unique key used to identify each simulation directory
potential_name = "mace-mp-0b3" 
simulation_type = "NVT"

# Compiling unique key
num_atoms = int(re.search(r"(\d+)_atoms", structure).group(1)) # find number of atoms from structure name
density = float(re.search(r"([\d.]+)_gcm", structure).group(1)) # find density from structure name
element = re.search(r"_(\w+)_Structures", structure).group(1) # find element and corresponding symbol from structure name

if element == "Carbon":
    element_symbol = "C"
else:
    raise ValueError(f"ERROR: Element name undefined in {structure}. Ensure regex matching parses the correct element name for the given file.")

unique_key = f"{element_symbol}_{potential_name}_{simulation_type}_{num_atoms}_{density}"

# Number of repeats
repeats = 1

#-----------------------------------

def submit_lammps(structure, unique_key, run): 

    # Job submission using "qsub -v"
    # "-v" allows you to submit parameters from the terminal (allows programatic file naming)
    structure = Path(structure).resolve()
    simulation_variables = (
        f"STRUCTURE={structure},"
        f"UNIQUE_KEY={unique_key},"
    )

    cmd = ["qsub", "-v", simulation_variables, "lammps_scheduler_mace-mp-0b3.sh", str(run)]
    submission = subprocess.run(cmd, capture_output=True, text=True)

    # Job submission failure checks
    if submission.returncode != 0:
        print("qsub failed:", submission.stderr or submission.stdout, file=sys.stderr)
        sys.exit(1)
    
    # Parsing job_ID
    out = submission.stdout.strip()
    print(out)
    return None

# Runs n MD simulations, named 1-n
for run in range (1, repeats+1):
    submit_lammps(structure, unique_key, run)


Your job 8248788 ("lammps_scheduler.sh") has been submitted
Your job 8248789 ("lammps_scheduler.sh") has been submitted
Your job 8248790 ("lammps_scheduler.sh") has been submitted
Your job 8248791 ("lammps_scheduler.sh") has been submitted
Your job 8248792 ("lammps_scheduler.sh") has been submitted
Your job 8248793 ("lammps_scheduler.sh") has been submitted
Your job 8248794 ("lammps_scheduler.sh") has been submitted
Your job 8248795 ("lammps_scheduler.sh") has been submitted
Your job 8248796 ("lammps_scheduler.sh") has been submitted
Your job 8248797 ("lammps_scheduler.sh") has been submitted


In [None]:
# ------ FILE CLEANUP ------ 
# 1. Waits until all simulations are done
# 2. Moves all MD files into directories based on their "unique key"
#       a) The unique key consists of "element_symbol", "potential_name", "num_atom", "density", "run"
# 3. Deletes empty log files


# ------ WAIT FUNCTION ------
import subprocess, time

WAIT_TIME = 300  # seconds
USER = "scat9451"

while True:
    job_status = subprocess.run(["qstat", "-u", USER], capture_output=True, text=True)
    if not job_status.stdout.strip():  # empty means no jobs
        break
    print("Active jobs found - waiting ...")
    time.sleep(WAIT_TIME)

print("No active jobs found - proceeding ...")
# -----------------------------

# ------ FILE ORGANISER ------
import re, shutil
from pathlib import Path

# Make LAMMPS_simulations directory in current directory
cwd = Path.cwd()
LAMMPS_dir = cwd / "LAMMPS_simulations"
LAMMPS_dir.mkdir(parents=True, exist_ok=True)

# Regex pattern for reading unique_key 
simulation_run_dir_name = re.compile(
    r'^(?P<element_symbol>[A-Za-z]{1,6})_'       # e.g. C
    r'(?P<potential_name>[^_]+)_'                # e.g. GAP17
    r'(?P<simulation_type>[^_]+)_'               # e.g. NVT
    r'(?P<num_atoms>\d+)_'                       # e.g. 64
    r'(?P<density>[\d.eE+-]+)'                   # e.g. 1.5 or 1.85e+00
    r'(?:_(?P<run>\d+))?'                        # optional: _1 (run number)
    r'$'
)

root_directory = Path("/u/vld/scat9451/main_project/")

# General function for moving a directory 
def directory_move(directory, destination_dir):

    directory = Path(directory)

    if not directory.exists():
        print(f"ERROR: {directory} does not exist")
        return
    
    # Don't move file into its own subtree
    if Path(destination_dir).resolve().is_relative_to(directory.resolve()):
        print(f"ERROR: Destination {destination_dir} is inside source {directory}; skipping to avoid recursive move.")
        return False
    
    # Make Destination Directory
    destination_dir = Path(destination_dir)
    destination_dir.mkdir(parents=True, exist_ok=True)

    # New Path with existance check
    moved_dir = destination_dir / directory.name
    

    if moved_dir.exists():
        if not OVERWRITE:
            print(f"WARNING: {moved_dir} already exists. Skipping move.")
            return False
        else:
            if moved_dir.is_dir():
                    shutil.rmtree(moved_dir)
            else:
                moved_dir.unlink()

    # Move Directory (with failsafe)
    try:
        shutil.move(str(directory), str(moved_dir))
    except Exception as e:
        print(f"ERROR: Failed to move {directory.name} --> {moved_dir.name}: {e}")
        return False
    
    return True


# Robust, general function for sorting all files using the unique_key and directory_move()
def sort_directory(working_directory):

    sorted_files = 0
    skipped_files = 0

    for directory in working_directory.iterdir(): # searches working directory for directories contained in it
        directory = Path(directory)
        
        if not directory.is_dir(): # select for directories only (run_dirs)
            continue

        m = simulation_run_dir_name.match(directory.name)
        if not m:
            continue

        # Parse each element of the unique key
        element_symbol = m.group("element_symbol")
        if element_symbol == "C":
            element_name = "Carbon"
        else:
            print(f"Unrecognized element symbol for {directory}. Skipping file. \nAdd element_symbol --> element_name mapping")
            skipped_files += 1
            continue
        potential_name = m.group("potential_name")
        simulation_type = m.group("simulation_type")
        num_atoms = int(m.group("num_atoms"))
        density = m.group("density")

        # Destination: evaluated using the unique key
        destination_dir = (
            LAMMPS_dir
            / f"Element: {element_name}"
            / f"Potential: {potential_name}"
            / f"Type: {simulation_type}"
            / f"Atoms: {num_atoms}"
            / f"Density: {density}"
        )
        
        moved = directory_move(directory, destination_dir)
        
        if moved:
            sorted_files +=1
        else:
            skipped_files +=1
    
    if sorted_files:
        print(f"Sorted {sorted_files} files")
    if skipped_files:
        print(f"Skipped {skipped_files} files")

    if not sorted_files and not skipped_files:
        print(f"No matching run files found in {working_directory}. Change run file regex if required.")


# -----------------------
# Use carefully - will replace ALL existing files 
OVERWRITE = True

if OVERWRITE:
    confirm = input("Are you sure you want to overwrite existing files? (y/n): ").strip().lower()
    if confirm != "y":
        OVERWRITE = False
# -----------------------

sort_directory(cwd)

# Deletes empty log files 
log_file_regex = re.compile(r'.*\.log$')

for file in cwd.iterdir():
    if (log_file_regex.fullmatch(file.name)
        and file.is_file()
        and file.stat().st_size == 0): # checks that file size is zero (i.e. they are actually empty)
        file.unlink()

No active jobs found - proceeding ...
Sorted 227 files
