In [None]:
 # ------- STRUCTURE TEST SET GENERATOR -----------

from pathlib import Path
from collections import defaultdict
from ase.io import read, write
import re

def import_simulation_data(directory):

    dump_file_name = re.compile(r"^dump_custom\.C\.(\d+)\.dat$") # Dump file regex
    unique_key_pattern = re.compile(
        r'^(?P<element_symbol>[A-Za-z]{1,6})_'          # e.g. C
        r'(?P<potential_name>[^_]+)_'                   # e.g. GAP17
        r'(?P<simulation_type>[^_]+)_'                  # e.g. NVT
        r'(?P<num_atoms>\d+)_'                          # e.g. 64
        r'(?P<density>[\d.eE+-]+)_'                     # e.g. 1.5 or 1.85e+00
        r'(?P<run>\d+)'                                 # e.g. 1 (run number) 
        )
     
    directory = Path(directory)

    imported_simulation_files = defaultdict(list) # Imported files dictionary

    imported_files_counter = 0
    skipped_files_counter = 0

    for path in directory.rglob("*"):
        
        if not path.is_file(): # Filters for files not directories
            continue

        m = dump_file_name.match(path.name) # Enforce dump_file file naming
        if not m:
            continue

        parent = path.parent
        
        if parent.name != "NVT": # Enforce NVT file naming
            skipped_files_counter += 1
            print(f"ERROR: Parent directory for {path}, {parent} is not equal to NVT")
            continue

        grandparent = parent.parent
    
        if not unique_key_pattern.match(grandparent.name): # Enforce unique_key file naming
            skipped_files_counter += 1
            print(f"ERROR: Invalid unique_key name format '{grandparent.name}'")
            continue

        if not grandparent.name: # Protect against missing grandparent
            skipped_files_counter += 1
            print(f"ERROR: No grandparent directory for {path}")
            continue

        unique_key = grandparent.name
        numeric_index = int(m.group(1))

        imported_simulation_files[unique_key].append((numeric_index, path))
        imported_files_counter += 1

    # sort each list by numeric index and drop the numeric index in final structure
    sorted_imported_simulation_files = {}

    for key, items in imported_simulation_files.items():
        items.sort(key=lambda pair: pair[0])  # sort by numeric_index
        paths_sorted = [p for _, p in items]
        sorted_imported_simulation_files[key] = paths_sorted

    if imported_files_counter:
        print(f"Imported {imported_files_counter} dump files")
    if skipped_files_counter:
        print(f"Skipped {skipped_files_counter} dump files due to errors")

    return sorted_imported_simulation_files

# Writes cif files from ALL imported LAMMPS dump files
def write_cif_files(data_dict, out_dir, timestep):

    timestep_fs = timestep * 100
    padded_timestep = f"{timestep_fs:05d}"

    out_dir = Path(out_dir)
    out_dir.mkdir(exist_ok=True)

    files_written_counter = 0

    for unique_key, dump_files in data_dict.items():
        
        atoms = read(dump_files[timestep])
        atoms.set_chemical_symbols([element] * len(atoms))

        file_name = f"{unique_key}_{padded_timestep}.cif"
        file_path = Path(out_dir) / file_name

        write(file_path, atoms)

        files_written_counter += 1

    if files_written_counter:
        print(f"{files_written_counter} cif files written to {out_dir.name}")
# -----------------------------------------------

simulation_dir = "LAMMPS_simulations/Element: Carbon/Potential: GAP17/Type: NVT/Atoms: 216"

element = "C" # LAMMPS dump files only contain a "type" index, this must be assigned to a given element

imported_simulation_files = import_simulation_data(simulation_dir)

amorphous_structures = write_cif_files(imported_simulation_files, 
                                       out_dir ="Carbon_Structures/Amorphous", timestep=95)

liquid_structures = write_cif_files(imported_simulation_files, 
                                       out_dir ="Carbon_Structures/Liquid", timestep=60)

In [None]:
# Crystal Structure Analysis

# 1. import crystal structure
# 2. relax using BFGS algorithm with a model
# 3. calculate energy relative to isolated atom
# 4  calculate lattice params and avg bond lengths

from pathlib import Path
from ase import atoms
from ase.io import read
from ase.optimize import BFGS
from ase.filters import UnitCellFilter
from graph_pes.models import load_model
from graph_pes.utils.calculator import merge_predictions

# model: path to model 
def energetics_calculator(in_dir, relaxed_dir, path_to_models, path_to_reference_structure, OVERWRITE):

    # Import structures from in_dir
    in_dir = Path(in_dir)
    downloaded_structures = []

    imported_structures_counter = 0

    for file in in_dir.rglob('*'):

        structure = read(file)
        downloaded_structures.append((file, structure))
        imported_structures_counter += 1
    
    print(f"Imported {imported_structures_counter} structures from {in_dir}\n")
    
    existing_traj_files_counter = 0
    # Import model
    for path_to_model in path_to_models:

        model = load_model(Path(path_to_model))
        calculator = model.ase_calculator()

        # Relax structures
        # Write trajectories and final frame 
        counter = 0
        
        # Loop over all structures
        for file, structure in downloaded_structures:

            traj_dir = Path(relaxed_dir) / "Trajectories"
            traj_dir.mkdir(parents=True, exist_ok=True)

            final_traj_dir = Path(relaxed_dir)/ "Final Trajectory Frame"
            final_traj_dir.mkdir(parents=True, exist_ok=True)

            traj_out_path = traj_dir / f"{Path(path_to_model).stem}_relaxed_{file.stem}.traj"
            final_traj_out_path = final_traj_dir / f"{Path(path_to_model).stem}_relaxed_{file.stem}.cif"

            structure.calc = calculator
            

            if traj_out_path.exists() and not OVERWRITE:
                existing_traj_files_counter +=1
                continue
            
            # Relax structure with BFGS (allowing for cell params to change)
            ucf = UnitCellFilter(structure)
            opt = BFGS(ucf,
                    logfile=None,             
                    trajectory=traj_out_path)
            opt.run(fmax=0.02, steps=200)


            # Write final relaxed structure
            final_structure = read(traj_out_path, index=-1)
            write(final_traj_out_path, final_structure)

            # Calculate energy of final structure
            calculator.calculate(final_structure, properties=["energy", "forces"])
            raw_energy = calculator.results.get("energy", None)
            forces = calculator.results.get("forces", None)

            # Calculate energy of reference structure
            reference_calc = model.ase_calculator()
            ref_struct = read(Path(path_to_reference_structure))
            reference_calc.calculate(ref_struct, properties = ["energy"])
            ref_energy = reference_calc.results.get("energy", None)

            # Relative energy
            relative_energy = raw_energy - ref_energy
            
            # Lattice params and bond lengths
            a, b, c, alpha, beta, gamma = final_structure.cell.cellpar()
        
            # Print results
            print(
                  f"{Path(path_to_model).name} Results for {file.stem}:"
                  f"\nLattice parameters = ({a},{b},{c}) ({alpha},{beta},{gamma})"
                  f"\nReference energy = {ref_energy}"
                  f"\nRaw energy = {raw_energy}"
                  f"\nRelative energy = {relative_energy}"
                  f"\nForces = {forces}\n"
                    )
            
            counter += 1

        if counter:
            print(f"Relaxed and analyzed {counter} structures with {Path(path_to_model).name}\n")

    print(f"Skipped {existing_traj_files_counter} existing traj files")

models_to_analyse = ["MACE_Models/medium-0b3.pt", 
                     "MACE_Models/medium-mpa-0.pt",
                     "MACE_Models/medium-omat-0.pt"]    

set_OVERWRITE = False

energetics_calculator(
                    in_dir="Carbon_Structures/Crystalline/Downloaded",
                    relaxed_dir="Carbon_Structures/Crystalline/Relaxed",
                    path_to_models= models_to_analyse,
                    path_to_reference_structure="Carbon_Structures/isolated_C.cif",
                    OVERWRITE=set_OVERWRITE
                    )


Imported 2 structures from Carbon_Structures/Crystalline/Downloaded

Skipped 6 existing traj files


In [None]:
# Created isolated carbon atom
from ase import Atoms
from ase.io import write

# One carbon atom in a large cubic box
atoms = Atoms('C', positions=[[15, 15, 15]], cell=[30, 30, 30], pbc=False)

write("isolated_C.cif", atoms)
