# Benchmarking mace

In [39]:
%load_ext autoreload
%reload_ext autoreload
%autoreload 2

from pymatgen.core.structure import Structure
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.io.ase import AseAtomsAdaptor
import copy
import numpy as np
import pandas as pd

import sys
import os
import json
import shutil as sh

from janus_core.calculations.single_point import SinglePoint
from janus_core.calculations.geom_opt import GeomOpt

current_dir = os.path.dirname(os.path.abspath("__file__"))
sys.path.append(current_dir)

from structure_generation import get_all_configurations_pmg, write_extended_xyz, generate_random_structures, \
    write_CRYSTAL_gui_from_data

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### AlGaN

In [2]:
AlN_bulk = Structure.from_file('data/bulk_structures/AlN.cif')

supercell_matrix = np.eye(3)*3

AlN_super3 = copy.deepcopy(AlN_bulk)

AlN_super3.make_supercell(supercell_matrix)

AlN_super3.num_sites



108

In [3]:
GaN_bulk = Structure.from_file('data/bulk_structures/GaN.cif')

supercell_matrix = np.eye(3)*3

GaN_super3 = copy.deepcopy(GaN_bulk)

GaN_super3.make_supercell(supercell_matrix)

GaN_super3.num_sites

108

In [3]:
# atom_indices_aln = get_all_configurations_pmg(AlN_super3)
# np.savetxt('data/symmetry/aln_108_atom_indices.csv',atom_indices_aln,delimiter=',',fmt='%s')

In [5]:
atom_indices_aln = np.genfromtxt('data/symmetry/aln_108_atom_indices.csv',delimiter=',').astype('int')

In [None]:
active_sites=np.where(np.array(AlN_super3.atomic_numbers) == 13)[0]
num_active_sites=len(active_sites)

N_atom = 31

all_config_atom_number = {}

for n,N_atoms in enumerate(np.arange(1,54)):

    structures_random = generate_random_structures(AlN_super3,atom_indices=atom_indices_aln,
                                                   N_atoms=N_atoms,new_species=31,N_config=500,
                                                   DFT_config=20,active_sites=active_sites)

    atom_number_tmp = []
    for structure in structures_random:
        atom_number_tmp.append(list(structure.atomic_numbers))

    all_config_atom_number[str(N_atoms)] = atom_number_tmp

# with open('data/supercell_structures/AlGaN/AlGaN_super3.json', 'w') as json_file:
#     json.dump(all_config_atom_number, json_file)

In [6]:
with open('data/supercell_structures/AlGaN/AlGaN_super3.json', 'r', encoding='utf-8') as json_file:
    AlGaN_super3_all_config = json.load(json_file)


In [None]:
# Generate the Extended XYZ files

lattice = AlN_super3.lattice.matrix
positions = AlN_super3.frac_coords
for N_atoms in AlGaN_super3_all_config.keys():
    
    folder_name = f'data/supercell_structures/AlGaN/AlGaN_super3_{N_atoms}'
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    
    for i,config in enumerate(AlGaN_super3_all_config[N_atoms]):
        structure = Structure(lattice,config,positions)

        write_extended_xyz(structure,os.path.join(folder_name,f'AlGaN_super3_{N_atoms}_{i}.xyz'))

### Write CRYSTAL input files

In [None]:
def generate_slurm_file(file_names_list, project_code='e05-algor-smw'):

    bash_script = [
    '#!/bin/bash\n',
    f'#SBATCH --nodes={len(file_names_list)}\n',
    '#SBATCH --ntasks-per-node=128\n',
    '#SBATCH --cpus-per-task=1\n',
    '#SBATCH --time=24:00:00\n\n',
    '# Replace [budget code] below with your full project code\n',
    f'#SBATCH --account={project_code}\n',
    '#SBATCH --partition=standard\n',
    '#SBATCH --qos=standard\n',
    '#SBATCH --export=none\n\n',
    'module load epcc-job-env\n',
    'module load other-software\n',
    'module load crystal\n\n',
    '# Address the memory leak\n',
    'export FI_MR_CACHE_MAX_COUNT=0\n',
    'export SLURM_CPU_FREQ_REQ=2250000\n\n',
    '# Run calculations\n'
]

    for file in file_names_list:
        bash_script.append(f'timeout 1430m /work/e05/e05/bcamino/runCRYSTAL/Pcry_slurm_multi {file[:-4]} &\n')

    bash_script.append('wait')

    return bash_script
    



In [75]:
AlN_lattice_matrix = np.round(AlN_super3.lattice.matrix[0:3], 6)
GaN_lattice_matrix = np.round(GaN_super3.lattice.matrix[0:3], 6)

AlGaN_lattice_matrix = (AlN_lattice_matrix + GaN_lattice_matrix)/2

In [None]:
from structure_generation import write_CRYSTAL_gui_from_data


lattice_matrix = AlGaN_lattice_matrix
cart_coords = np.round(AlN_super3.cart_coords,8)


for N_atoms in AlGaN_super3_all_config.keys():
    
    for i,config in enumerate(AlGaN_super3_all_config[N_atoms]):

        atomic_numbers = config

        folder_name = f'data/crystal/AlGaN/super3/config_{i}/'
        file_name = f'AlGaN_super3_{N_atoms}_{i}_0.gui'
        full_name = os.path.join(folder_name,file_name)
        if not os.path.exists(folder_name):
            os.makedirs(folder_name)
        
        for i,config in enumerate(AlGaN_super3_all_config[N_atoms]):
            structure = Structure(lattice_matrix,config,cart_coords)

            write_CRYSTAL_gui_from_data(lattice_matrix,atomic_numbers,
                                cart_coords, full_name, dimensionality = 3)


In [9]:
folder_path = 'data/crystal/AlGaN/super3/'

folders = [name for name in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, name))]

for folder in folders:

    folder_path_new = os.path.join(folder_path,folder)
    slurm_file_name = os.path.join(folder_path_new,f'{folder}_0.slurm')
    files = [name for name in os.listdir(folder_path_new) 
         if os.path.isfile(os.path.join(folder_path_new, name)) and name.endswith('.gui')]

    # copy .d12
    for file in files:
        input_file = os.path.join(folder_path_new,f'{file[:-4]}.d12')
        sh.copy('data/crystal/AlGaN/super3/super3_input.d12', input_file)

    bash_script = generate_slurm_file(files)
    with open(slurm_file_name, 'w') as file:
        for line in bash_script:
            file.write(f"{line}")



### Read CRYSTAL output files

In [None]:
with open('data/crystal/AlGaN/super3/output_files/AlGaN_super3_1_0_0.out', 'r') as f:
    file_content = f.readlines()

In [65]:
def parse_extended_xyz(file_content, num_atoms):
    """
    Parse the file to extract structures, converting lattice parameters to a 3x3 matrix and including Cartesian coordinates.
    
    Parameters:
        file_content (list of str): Lines of the file to parse.
        num_atoms (int): Number of atoms per structure.
    
    Returns:
        list of dict: Extracted data for each structure, including fractional coordinates, Cartesian coordinates, lattice matrix, energy (eV), forces, and stress.
    """
    # Helper function to extract floats from a string (handles scientific notation)
    def extract_floats(line):
        return list(map(float, re.findall(r"[-+]?\d*\.\d+(?:[Ee][-+]?\d+)?", line)))

    results = []
    structure_data = {}

    for i, line in enumerate(file_content):
        line = line.strip()

        # Lattice parameters
        if "ATOM                 X/A                 Y/B                 Z/C" in line:
            lattice_params = extract_floats(file_content[i - 3])
            if len(lattice_params) == 6:
                a, b, c, alpha, beta, gamma = lattice_params
                structure_data['lattice_matrix'] = lattice_params_to_matrix(a, b, c, alpha, beta, gamma)

        # Fractional coordinates
        if "ATOM                 X/A                 Y/B                 Z/C" in line:
            start = i + 2
            structure_data['fractional_coordinates'] = [
                extract_floats(file_content[start + j])
                for j in range(num_atoms)
            ]

            # Calculate Cartesian coordinates
            fractional_coords = structure_data['fractional_coordinates']
            lattice_matrix = structure_data['lattice_matrix']
            structure_data['cartesian_coordinates'] = [
                np.dot(coord, lattice_matrix) for coord in fractional_coords
            ]

        # Energy
        if "== SCF ENDED - CONVERGENCE ON ENERGY      E(AU)" in line:
            energy_hartree = extract_floats(line)[0]
            structure_data['energy_ev'] = energy_hartree * HARTREE_TO_EV

        # Forces
        if "CARTESIAN FORCES IN HARTREE/BOHR (ANALYTICAL)" in line:
            start = i + 2
            structure_data['forces'] = [
                extract_floats(file_content[start + j])
                for j in range(num_atoms)
            ]

        # Stress tensor
        if "STRESS TENSOR, IN HARTREE/BOHR^3:" in line:
            start = i + 4
            structure_data['stress'] = [
                extract_floats(file_content[start + j])
                for j in range(3)
            ]

        # Store the structure if all required fields are found
        if all(key in structure_data for key in ['lattice_matrix', 'fractional_coordinates', 'cartesian_coordinates', 'energy_ev', 'forces', 'stress']):
            results.append(structure_data.copy())
            structure_data = {}  # Reset for the next structure

    return results

In [66]:
num_atoms = 108  
# Parse the file and extract structures with lattice matrix conversion
parsed_structures_matrix = parse_extended_xyz(file_content, num_atoms)

# Convert to DataFrame for inspection
df_structures_matrix = pd.DataFrame(parsed_structures_matrix)

In [68]:
df_structures_matrix.iloc[0]

lattice_matrix            [[9.476278, 0.0, 0.0], [-4.738138933233883, 8....
fractional_coordinates    [[0.2200996653749, 0.1100498775573, 0.16356962...
cartesian_coordinates     [[1.5642940073476619, 0.9031460038402501, 2.50...
energy_ev                                                    -482435.890019
forces                    [[-0.001149886825162, -0.01423317605655, -0.01...
stress                    [[0.00012142961089, 2.56922452916e-05, 9.87521...
Name: 0, dtype: object

## mace geometry optimisation

In [43]:
def mace_geom_opt(atoms):

    atoms_sp = SinglePoint(
        struct=atoms.copy(),
        arch="mace_mp",
        device='cpu',
        calc_kwargs={'model_paths':'small','default_dtype':'float64'},
    )

    atoms_opt = GeomOpt(
        struct=atoms_sp.struct,
        fmax=0.001,
    )

    atoms_opt.run()

    return atoms_opt

In [54]:
np.round(sAlN_super3_mace_opt.struct.positions[0:],6)


array([[ 1.561583,  0.90158 ,  2.512633],
       [ 1.561583,  0.90158 ,  7.536496],
       [ 1.561583,  0.90158 , 12.56036 ],
       [ 0.      ,  3.606321,  2.512633],
       [ 0.      ,  3.606321,  7.536496],
       [ 0.      ,  3.606321, 12.56036 ],
       [-1.561583,  6.311062,  2.512633],
       [-1.561583,  6.311062,  7.536496],
       [-1.561583,  6.311062, 12.56036 ],
       [ 4.684749,  0.90158 ,  2.512633],
       [ 4.684749,  0.90158 ,  7.536496],
       [ 4.684749,  0.90158 , 12.56036 ],
       [ 3.123166,  3.606321,  2.512633],
       [ 3.123166,  3.606321,  7.536496],
       [ 3.123166,  3.606321, 12.56036 ],
       [ 1.561583,  6.311062,  2.512633],
       [ 1.561583,  6.311062,  7.536496],
       [ 1.561583,  6.311062, 12.56036 ],
       [ 7.807914,  0.90158 ,  2.512633],
       [ 7.807914,  0.90158 ,  7.536496],
       [ 7.807914,  0.90158 , 12.56036 ],
       [ 6.246332,  3.606321,  2.512633],
       [ 6.246332,  3.606321,  7.536496],
       [ 6.246332,  3.606321, 12.5