In [None]:
from pathlib import Path
from collections import defaultdict
from ase.io import read, write
import re

# ------ MAKE NEW DIRECTORIES ------

cwd = Path.cwd()

amorphous_dir = cwd / "Amorphous_Structures"
amorphous_dir.mkdir(exist_ok=True)
# ----------------------------------

# ------  IMPORT SIMULATION DATA ------
# Creates a dictionary sorted_imported_simulation_files = {unique_key: [sorted list of dump_file path objects]} 

# NOTE: The unique_key is generated from the grandparent of the dumpfiles
# This function expects the following file structure, "dump_custom.C.00000" regex and unique_key regex:
#
# <unique_key>/                     
#         └── NVT/                       
#               ├── dump_custom.C.00000.dat  
#               ├── dump_custom.C.00001.dat

def import_simulation_data(directory):

    dump_file_name = re.compile(r"^dump_custom\.C\.(\d+)\.dat$") # Dump file regex
    unique_key_pattern = re.compile(
        r'^(?P<element_symbol>[A-Za-z]{1,6})_'          # e.g. C
        r'(?P<potential_name>[^_]+)_'                   # e.g. GAP17
        r'(?P<simulation_type>[^_]+)_'                  # e.g. NVT
        r'(?P<num_atoms>\d+)_'                          # e.g. 64
        r'(?P<density>[\d.eE+-]+)_'                     # e.g. 1.5 or 1.85e+00
        r'(?P<run>\d+)'                                 # e.g. 1 (run number) 
        )
     
    directory = Path(directory)

    imported_simulation_files = defaultdict(list) # Imported files dictionary

    imported_files_counter = 0
    skipped_files_counter = 0

    for path in directory.rglob("*"):
        
        if not path.is_file(): # Filters for files not directories
            continue

        m = dump_file_name.match(path.name) # Enforce dump_file file naming
        if not m:
            continue

        parent = path.parent
        
        if parent.name != "NVT": # Enforce NVT file naming
            skipped_files_counter += 1
            print(f"ERROR: Parent directory for {path}, {parent} is not equal to NVT")
            continue

        grandparent = parent.parent
    
        if not unique_key_pattern.match(grandparent.name): # Enforce unique_key file naming
            skipped_files_counter += 1
            print(f"ERROR: Invalid unique_key name format '{grandparent.name}'")
            continue

        if not grandparent.name: # Protect against missing grandparent
            skipped_files_counter += 1
            print(f"ERROR: No grandparent directory for {path}")
            continue

        unique_key = grandparent.name
        numeric_index = int(m.group(1))

        imported_simulation_files[unique_key].append((numeric_index, path))
        imported_files_counter += 1

    # sort each list by numeric index and drop the numeric index in final structure
    sorted_imported_simulation_files = {}

    for key, items in imported_simulation_files.items():
        items.sort(key=lambda pair: pair[0])  # sort by numeric_index
        paths_sorted = [p for _, p in items]
        sorted_imported_simulation_files[key] = paths_sorted

    if imported_files_counter:
        print(f"Imported {imported_files_counter} dump files")
    if skipped_files_counter:
        print(f"Skipped {skipped_files_counter} dump files due to errors")

    return sorted_imported_simulation_files

# Writes cif files from the imported LAMMPS dump files
def write_cif_files(data_dict, out_dir, timestep):

    files_written_counter = 0

    for unique_key, dump_files in data_dict.items():
        
        atoms = read(dump_files[timestep])
        atoms.set_chemical_symbols([element] * len(atoms))

        file_name = f"{unique_key}.cif"
        file_path = Path(out_dir) / file_name

        write(file_path, atoms)

        files_written_counter += 1

    if files_written_counter:
        print(f"{files_written_counter} cif files written to {out_dir.name}")
# -----------------------------------------------

simulation_dir = "LAMMPS_simulations/Element: Carbon/Potential: GAP17/Type: NVT/Atoms: 216"
timestep = 95 #(indexed at 0)

element = "C" # LAMMPS dump files only contain a "type" index, this must be assigned to a given element

imported_simulation_files = import_simulation_data(simulation_dir)
write_cif_files(imported_simulation_files, amorphous_dir, timestep)

In [None]:
# Energetics Calculator

