In [None]:
# ------  CASTEP INPUT FILE GENERATOR ------

# 1. Generates .cell and .param files for all specified densities, num_atoms and timesteps
# 2. Generates a list of file prefixes for job-array submission on SLURM
# 3. Generates a SLURM run script

from pathlib import Path
import re
from ase.io import read
from ase.calculators.castep import Castep
from ase.io.castep import write_castep_cell, write_param
from pymatgen.io.vasp import Kpoints
from pymatgen.io.ase import AseAtomsAdaptor
import warnings

#-------------------------------------------------------------------------------------------
#  Filter out noisy from ASE Castep interface
warnings.filterwarnings("ignore",
                        message="Generating CASTEP keywords JSON file",
                        category=UserWarning,
                        module="ase.calculators.castep")

warnings.filterwarnings("ignore",
                        message="Could not determine the version of your CASTEP binary",
                        category=UserWarning,
                        module="ase.calculators.castep")
#-------------------------------------------------------------------------------------------

#-------------------------------------------------------------------------------------------
# Generate CASTEP file name using unique_key + dump file timestep 
#   e.g. C_GAP17_NVT_64_1.5_1_01000
#   Note: assumes file path structure ".../C_GAP17_NVT_64_1.5_1/NVT/dump_custom.C.01000.dat"

def Castep_input_file_generator(lammps_input_file_path, calc, out_dir):
    
    path_components = Path(lammps_input_file_path).parts

    unique_key = path_components[-3]

    unique_key_pattern = re.compile(
    r'^(?P<element_symbol>[A-Za-z]{1,6})_'          # e.g. C
    r'(?P<potential_name>[^_]+)_'                   # e.g. GAP17
    r'(?P<simulation_type>[^_]+)_'                  # e.g. NVT
    r'(?P<num_atoms>\d+)_'                          # e.g. 64
    r'(?P<density>[\d.eE+-]+)_'                     # e.g. 1.5 or 1.85e+00
    r'(?P<run>\d+)'                                 # e.g. 1 (run number) 
    )

    m = unique_key_pattern.match(unique_key)
    if not m:
        raise ValueError(f"Invalid unique_key name format: {unique_key}")
        
    element_symbol = m.group(1)

    dump_file_name = path_components[-1]
    timestep = re.search(r'(\d+)', dump_file_name).group(1)
    castep_unique_key = f"{unique_key}_{timestep}"

    # Read lammps dump file
    atoms = read(lammps_input_file_path, format='lammps-dump-text', specorder = [element_symbol])
    atoms.calc = calc

    CASTEP_input_dir = Path(out_dir)
    CASTEP_input_dir.mkdir(parents=True, exist_ok=True)

    cell_file_name = f"{castep_unique_key}.cell"
    param_file_name = f"{castep_unique_key}.param"

    cell_file_path = CASTEP_input_dir / cell_file_name
    param_file_path = CASTEP_input_dir / param_file_name

    # Set spin polarisation to 0.6
    atoms.set_initial_magnetic_moments([0.6] * len(atoms))

    write_castep_cell(cell_file_path, atoms, positions_frac=False, 
    force_write=True, precision=6, magnetic_moments="initial", castep_cell=calc.cell)
        
    write_param(param_file_path, calc.param, force_write=True)

# For a given file, reads lammps file and returns CASTEP kpoints, VASP kpoints
def k_points_generator(file_path):
    
    file_path = Path(file_path)

    atoms = read(file_path)
    s = AseAtomsAdaptor.get_structure(atoms)
    # Generate MP-style KPOINTS with kppa = 1000
    # Uses Gamma centered meshes for hexagonal and face-centered cells, 
    # Monkhorst-Pack otherwise
    
    kp = Kpoints.automatic_density(s, kppa=1000)  # k
    
    n1, n2, n3 = kp.kpts[0]   # mesh subdivisions along b1, b2, b3 

    style = str(getattr(kp.style, "name", kp.style)).lower()
    gamma = "gamma" in style  # True for Gamma-centred meshes

    # Return an ASE kpts dict
    return {"size": (n1, n2, n3), "gamma": gamma}, kp

# File Generator
def multi_castep_generator(number_of_runs, densities, timesteps, num_atoms, potential, out_dir):

    runs = []
    for i in range (1,number_of_runs+1):
        runs.append(i)

    counter = 0
    all_kpoints = []
    all_VASP_kpoints = []
    for density in densities:
        for run in runs:
            for timestep in timesteps:

                padded_timestep_str = f"{timestep:05d}"

                if num_atoms == 64:
                    density = float(density)
                    density = f"{density:.2f}"
                elif num_atoms == 216:
                    density = str(density)
                
                lammps_input_file_path =  (f"LAMMPS_simulations/Element: Carbon/Potential: {potential}/Type: NVT/"
                    f"Atoms: {num_atoms}/Density: {density}/"
                    f"C_{potential}_NVT_{num_atoms}_{density}_{str(run)}/"
                    f"NVT/dump_custom.C.{padded_timestep_str}.dat")
                
                kpoints, VASP_kpoints = k_points_generator(lammps_input_file_path)

                calc = Castep(kpts=kpoints)

                # set CASTEP parameters 
                calc.param.task                = 'singlepoint'
                calc.param.cut_off_energy      = 520          
                calc.param.xc_functional       = 'PBE'

                calc.param.SPIN_POLARIZED      = 'TRUE' # need to set 0.6 for all atoms seed

                calc.param.MAX_SCF_CYCLES      = 100

                calc.param.ELEC_ENERGY_TOL     = 5e-5 * num_atoms
                
                calc.param.SMEARING_SCHEME     = 'GAUSSIAN'
                calc.param.SMEARING_WIDTH      = '0.05 eV'

                Castep_input_file_generator(lammps_input_file_path, calc, out_dir)

                counter += 1
                all_kpoints.append(kpoints)
                all_VASP_kpoints.append(VASP_kpoints)
    
    # Create list of file stems for CASTEP submission script
    out_dir = Path(out_dir)
    list_path = out_dir / "list_of_castep_jobs.txt"
    existing = []
    if list_path.exists():
        with open(list_path) as f:
            existing = [line.strip() for line in f if line.strip()]

    unique_file_names = []
    for file in out_dir.rglob('*'):

        if not file.is_file():
            continue

        if file.suffix not in (".cell", ".param"):
            if file != list_path:
                print(f"Unrecognized file format: {file.name}. Must be .cell or .param")
            continue

        if file.stem in unique_file_names:
            continue
        else:
            unique_file_names.append(file.stem)

    # Merge and deduplicate
    merged = list(dict.fromkeys(existing + unique_file_names))

    # Save back
    with open(list_path, "w") as f:
        f.write("\n".join(merged) + "\n")

        
    print(f"Created {counter} CASTEP input files")
    unique_kpoints = [dict(t) for t in {tuple(sorted(d.items())) for d in all_kpoints}]
    print(f"Unique kpoints: {unique_kpoints}")
#-------------------------------------------------------------------------------------------

fine_tuning_densities = [1.25,1.50,1.75,2.00,2.25,2.50,2.75,3.00,3.25,3.50]
fine_tuning_timesteps = [1000,2000,3000, 4000,5000,6000, 6100,6300,6500, 7500,8500,9500]
test_densities = [1.5,1.6,1.7,1.8,1.9,2.0,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9,3.0,3.1,3.2,3.3,3.4,3.5]
test_timesteps = [6000,9500]

# Fine-tuning set
fine_tuning_set = multi_castep_generator(number_of_runs=10, 
                                         densities=fine_tuning_densities, 
                                         timesteps=fine_tuning_timesteps,
                                         num_atoms=64,
                                         potential="GAP17",
                                         out_dir="CASTEP/Input_files")

# Test set
test_set = multi_castep_generator(number_of_runs=10, 
                                  densities=test_densities, 
                                  timesteps=test_timesteps,
                                  num_atoms=216,
                                  potential="GAP17",
                                  out_dir="CASTEP/Input_files")



Created 1200 CASTEP input files
Unique kpoints: [{'gamma': False, 'size': (2, 2, 2)}]
Created 420 CASTEP input files
Unique kpoints: [{'gamma': True, 'size': (1, 1, 1)}]
