In [None]:
from pathlib import Path
import re
import pandas as pd
from IPython.display import display

# ------ SIMULATION VALIDATOR ------
# Before running analysis it's important to check that all simulations have run as expected
# 1. Check expected number of dump files
# 2. Checks log files for ERRORs and WARNINGs
# 3. Check that all expected simulations exist
# ----------------------------------

expected_number_of_dump_files_per_sim = 96             # =(total number of timesteps + 1)/100
expected_densities = [1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0, 3.25, 3.5]
expected_runs = [1,2,3,4,5,6,7,8,9,10]
list_files_with_warnings = False

# This expects file structure:
# <unique_key>/                     
#         └── NVT/                       
#               ├── dump_custom.C.00000.dat  
#               ├── dump_custom.C.00001.dat

def simulation_validator(directory):

    directory = Path(directory)  

    # Check number of files and file types
    errors = set()
    warnings = set()
    invalid_sims = 0

    
    for nvt_dir in directory.rglob("NVT"):
        if nvt_dir.is_dir():

            # All files in the NVT directory
            all_files = [f for f in nvt_dir.iterdir() if f.is_file()]
            total = len(all_files)

            unique_key = nvt_dir.parent.name
            # Check number of files matches expected number
            if total != expected_number_of_dump_files_per_sim:
                print(f"{total} files found in {unique_key}")
                invalid_sims += 1

            # Files that start with 'dump_custom'
            unrecognized_files = [f for f in all_files if not f.name.startswith("dump_custom")]
            if unrecognized_files:
                print(f"Unrecognized files found in {unique_key}")
                print(unrecognized_files)
                invalid_sims += 1
    
    # Check for ERROR and WARNING messages in .log files
    for log_file in directory.rglob("*.log"):
        
        has_error = False
        has_warning = False

        unique_key = log_file.parent.name 
        
        with log_file.open("r", encoding="utf-8", errors="ignore") as f:
            for line in f:
                if "ERROR" in line:
                    has_error = True
                    break  # stop reading; error takes priority
                elif "WARNING" in line:
                    has_warning = True

        # Decide classification
        if has_error:
            errors.add(unique_key)
            invalid_sims += 1
        elif has_warning:
            warnings.add(unique_key)

    if errors:
        print(f"{len(errors)} Simulations displaying errors")
        for e in sorted(errors):
            print(f"   - {e}")
    else:
        print("\nNo simulations displaying errors.")

    if warnings:
        print(f"\n{len(warnings)} Simulations displaying warnings")
        if list_files_with_warnings:
            for w in sorted(warnings):
                print(f"   - {w}")
    else:
        print("\nNo simulations displaying warnings.")

    print(f"\n{invalid_sims} Invalid simulations")

    
    # ------ Missing Simulation Check -----
    simulations = []

    unique_key_pattern = re.compile(
        r'^(?P<element_symbol>[A-Za-z]{1,6})_'          # e.g. C
        r'(?P<potential_name>[^_]+)_'                   # e.g. GAP17
        r'(?P<simulation_type>[^_]+)_'                  # e.g. NVT
        r'(?P<num_atoms>\d+)_'                          # e.g. 64
        r'(?P<density>[\d.eE+-]+)_'                     # e.g. 1.5 or 1.85e+00
        r'(?P<run>\d+)$'                                 # e.g. 1 (run number) 
        )

    # Generate df of simulations
    for unique_key in directory.rglob('*'):

        if not unique_key.is_dir(): # must be a directory
            continue

        # Only matches unique_key dirs
        m = unique_key_pattern.fullmatch(unique_key.name)
        if not m:
            continue

        element_symbol = m.group(1)
        potential_name = m.group(2)
        simulation_type = m.group(3)
        num_atoms = m.group(4)
        density = m.group(5)
        run = m.group(6)

        simulations.append((element_symbol, potential_name, simulation_type, num_atoms, density, run))

    simulations_df = pd.DataFrame(
                    simulations,
                    columns=['element_symbol', 'potential_name', 'simulation_type', 'num_atoms', 'density', 'run'])

    simulations_df = simulations_df.astype({
    'num_atoms': 'int64',
    'density': 'float64',
    'run': 'int64'
    })

    # Generate expected dataframe
    unique_sim_params_df = simulations_df[['element_symbol', 'potential_name', 'simulation_type', 'num_atoms']].drop_duplicates()
    expected_sims_df = pd.DataFrame(columns=['element_symbol', 'potential_name', 'simulation_type', 'num_atoms', 'density', 'run'])
    
    for _, row in unique_sim_params_df.iterrows():
        for density in expected_densities:
            for run in expected_runs:
                expected_sims_df.loc[len(expected_sims_df)] = {'element_symbol': row['element_symbol'],
                                                                'potential_name': row['potential_name'],
                                                                'simulation_type': row['simulation_type'],
                                                                'num_atoms': row['num_atoms'],
                                                                'density': density,
                                                                'run': run}

    expected_sims_df = expected_sims_df.astype({
    'num_atoms': 'int64',
    'density': 'float64',
    'run': 'int64'
    })
    
    # Find diffs between expected and found
    diff = simulations_df.merge(expected_sims_df, how = 'outer', indicator=True)
    missing_sims = diff[diff['_merge'] == 'right_only']
     
    if missing_sims.empty:
        print("\nNo missing simulations")
    
    else:
        print(f"\nWARNING: {len(missing_sims)} Missing simulation(s) (based on expected number of runs and densities)")
        print("\nMissing Simulations:")
        display(missing_sims)

directory_to_validate = "LAMMPS_simulations/Element: Carbon/Potential: GAP17/Type: NVT/Atoms: 64"

simulation_validator(directory_to_validate)

In [None]:
#--------- ANALYSIS SCRIPT---------------

# This script searches the LAMMPS_simulations directory and 
# 1. Identifies simulations that have not yet been analysed
# 2. Creates ovito files and renderings
# 3. Runs the following analyses with Ovito:
#   a) % of sp, sp2, sp3 environments 
#   b) Radial Distribution Functions
#   c) Potential Energy
#   d) Bond Length
#   e) Force Magnitude
#   f) Angular Distribution functions
# 4. Runs the following analyses with MatSciPy
#   a) Ring size histogram
#   b) n-membered rings against density

import re
import numpy as np
from pathlib import Path
from collections import defaultdict

# Ovito
import ovito
from ovito.io import import_file
from ovito.modifiers import CreateBondsModifier, CoordinationAnalysisModifier, ColorCodingModifier, BondAnalysisModifier
from ovito.vis import Viewport, TachyonRenderer, ColorLegendOverlay, BondsVis
from ovito.qt_compat import QtCore

# MatSciPy
from matscipy.rings import ring_statistics
from ase.io import read

# ------ MAKE NEW DIRECTORIES ------

cwd = Path.cwd()

analysis_dir = cwd / "Analysis"
analysis_dir.mkdir(exist_ok=True)

structural_analysis_dir = analysis_dir / "Amorphous Structural Analysis"
structural_analysis_dir.mkdir(exist_ok=True)

data_dir = structural_analysis_dir / "Raw Data"
data_dir.mkdir(exist_ok=True)

ovito_dir = structural_analysis_dir / "Ovito"
ovito_dir.mkdir(exist_ok=True)
# ----------------------------------

# ------  IMPORT SIMULATION DATA ------
# 1. Searches recursively through the specified directory
# 2. Creates a dictionary sorted_imported_simulation_files = {unique_key: [sorted list of dump_file path objects]} 
# 3. This can be loaded like so: 
#   a) first item: unique_key, dump_file = next(iter(imported_simulation_files.items()))
#   b) loop through all items: for unique_key, dump_files in imported_simulation_files.items():

# NOTE: The unique_key is generated from the grandparent of the dumpfiles
# This function expects the following file structure, "dump_custom.C.00000" regex and unique_key regex:
#
# <unique_key>/                     
#         └── NVT/                       
#               ├── dump_custom.C.00000.dat  
#               ├── dump_custom.C.00001.dat

def import_simulation_data(directory):

    dump_file_name = re.compile(r"^dump_custom\.C\.(\d+)\.dat$") # Dump file regex
    unique_key_pattern = re.compile(
        r'^(?P<element_symbol>[A-Za-z]{1,6})_'          # e.g. C
        r'(?P<potential_name>[^_]+)_'                   # e.g. GAP17
        r'(?P<simulation_type>[^_]+)_'                  # e.g. NVT
        r'(?P<num_atoms>\d+)_'                          # e.g. 64
        r'(?P<density>[\d.eE+-]+)_'                     # e.g. 1.5 or 1.85e+00
        r'(?P<run>\d+)'                                 # e.g. 1 (run number) 
        )
     
    directory = Path(directory)

    imported_simulation_files = defaultdict(list) # Imported files dictionary

    imported_files_counter = 0
    skipped_files_counter = 0

    for path in directory.rglob("*"):
        
        if not path.is_file(): # Filters for files not directories
            continue

        m = dump_file_name.match(path.name) # Enforce dump_file file naming
        if not m:
            continue

        parent = path.parent
        
        if parent.name != "NVT": # Enforce NVT file naming
            skipped_files_counter += 1
            print(f"ERROR: Parent directory for {path}, {parent} is not equal to NVT")
            continue

        grandparent = parent.parent
    
        if not unique_key_pattern.match(grandparent.name): # Enforce unique_key file naming
            skipped_files_counter += 1
            print(f"ERROR: Invalid unique_key name format '{grandparent.name}'")
            continue

        if not grandparent.name: # Protect against missing grandparent
            skipped_files_counter += 1
            print(f"ERROR: No grandparent directory for {path}")
            continue

        unique_key = grandparent.name
        numeric_index = int(m.group(1))

        imported_simulation_files[unique_key].append((numeric_index, path))
        imported_files_counter += 1

    # sort each list by numeric index and drop the numeric index in final structure
    sorted_imported_simulation_files = {}

    for key, items in imported_simulation_files.items():
        items.sort(key=lambda pair: pair[0])  # sort by numeric_index
        paths_sorted = [p for _, p in items]
        sorted_imported_simulation_files[key] = paths_sorted

    if imported_files_counter:
        print(f"Imported {imported_files_counter} dump files")
    if skipped_files_counter:
        print(f"Skipped {skipped_files_counter} dump files due to errors")

    return sorted_imported_simulation_files

imported_simulation_files = import_simulation_data("LAMMPS_simulations")
print(f"Imported {len(imported_simulation_files)} LAMMPS simulation files")

# Sets up an empty pipeline for each successive function to use
def empty_ovito_pipeline(imported_simulation_files):

    # Clear existing pipeline
    for p in list(ovito.scene.pipelines):
        p.remove_from_scene()

    if not imported_simulation_files:
        raise ValueError("No datafiles provided to empty_ovito_pipeline()")
    
    # Load the first item in the dictionary 
    unique_key, dump_file = next(iter(imported_simulation_files.items()))

    if not dump_file:
        raise ValueError(f"No dump files found for simulation '{unique_key}'")
    
    pipeline = import_file(dump_file)
    
    return pipeline
pipeline = empty_ovito_pipeline(imported_simulation_files)

# Data visualisation in Ovito
def ovito_analysis(data_dict, pipeline):

    if not data_dict:
        raise ValueError("No datafiles provided to ovito_analysis()")

    # ------- ANALYSIS OF IMPORTED FILES ------------
    # BUG: Image and video renderers error with: 
    # "RuntimeError: Visual element 'Rings' reported an error:Failed to build non-periodic representation of periodic surface mesh. Periodic domain might be too small." if ring mod is included.

    # Bond Modifier and Visuals 
    bond_modifier = CreateBondsModifier(cutoff=1.85)
    bond_modifier.vis.width = 0.15
    bond_modifier.vis.coloring_mode = BondsVis.ColoringMode.Uniform
    bond_modifier.vis.color = (0.5, 0.5, 0.5)
    pipeline.modifiers.append(bond_modifier)

    # Coordination Modifier and Colour Coding
    pipeline.modifiers.append(CoordinationAnalysisModifier(cutoff=1.85))
    colour_coding_mod = ColorCodingModifier(property="Coordination",start_value=1.0,end_value=4.0,gradient=ColorCodingModifier.Viridis(),discretize_color_map=True)
    pipeline.modifiers.append(colour_coding_mod)

    # Add to Scene
    pipeline.add_to_scene()

    # Viewing settings
    vp = Viewport()
    vp.type = Viewport.Type.Perspective

    # Coordination Legend
    legend = ColorLegendOverlay(
        title = "Coordination",
        modifier = colour_coding_mod,
        alignment = QtCore.Qt.AlignmentFlag.AlignHCenter | QtCore.Qt.AlignmentFlag.AlignBottom,
        orientation = QtCore.Qt.Orientation.Horizontal,
        font_size = 0.1,
        format_string = '%.0f' 
        )
    vp.overlays.append(legend)

    # Note: this function only renders for the first repeat 
    def is_run_1_(run_file_name):
        return re.match(r".*1$", run_file_name) is not None

    # Skipped/Created file counters
    skipped_ovito_files_counter = 0
    skipped_png_files_counter = 0
    skipped_avi_files_counter = 0
    created_ovito_files_counter = 0
    created_png_files_counter = 0
    created_avi_files_counter = 0
    

    for unique_key, dump_files in data_dict.items():

        unique_key_pattern = re.compile(
        r'^(?P<element_symbol>[A-Za-z]{1,6})_'          # e.g. C
        r'(?P<potential_name>[^_]+)_'                   # e.g. GAP17
        r'(?P<simulation_type>[^_]+)_'                  # e.g. NVT
        r'(?P<num_atoms>\d+)_'                          # e.g. 64
        r'(?P<density>[\d.eE+-]+)_'                     # e.g. 1.5 or 1.85e+00
        r'(?P<run>\d+)'                                 # e.g. 1 (run number) 
        )

        m = unique_key_pattern.match(unique_key)
        if not m:
            print(f"Invalid unique_key name format: {unique_key}")
            
        element_symbol = m.group(1)
        potential_name = m.group(2)
        simulation_type = m.group(3)
        num_atoms = m.group(4)
        density = m.group(5)
        run = m.group(6)

        if element_symbol == "C":
            element = "Carbon"
        elif element_symbol == "Si":
            element = "Silicon"
        else:
            raise ValueError (f"Unrecognized element_symbol: {element_symbol}. Update symbol --> element mapping")

        # File Name
        ovito_file_dir = ovito_dir / f"Element: {element}" / f"Potential: {potential_name}" / f"Type: {simulation_type}" / f"Atoms: {num_atoms}" / f"Density: {density}" 
        ovito_file_dir.mkdir(parents=True, exist_ok=True)

        tachyon = TachyonRenderer(shadows=False, direct_light_intensity=1.1)

        # Only does analysis for run_1_
        if not is_run_1_(unique_key):
            continue
            
        
        ovito_save_file = ovito_file_dir / f"{unique_key}.ovito"
        
        # Ovito File Existance-Checker
        ovito_exists = any(ovito_dir.rglob(ovito_save_file.name))

        if ovito_exists:
            skipped_ovito_files_counter += 1
            continue
        
        pipeline.source.load(dump_files)

        # Set particle scaling (datafile specific)
        n_frames = pipeline.source.num_frames
        final_frame = max(0, n_frames - 1)
        data = pipeline.compute(frame = final_frame)
        data.particles.vis.scaling = 0.3

        # Set Zoom
        vp.zoom_all()

        ovito.scene.save(ovito_save_file)
        created_ovito_files_counter += 1   

        # Create Images    
        img_save_file = ovito_file_dir / f"{unique_key}.png"
        img_save_file_str = str(img_save_file)
        
        # Ovito File Existance-Checker
        img_exists = any(ovito_dir.rglob(img_save_file.name))

        if img_exists:
            skipped_png_files_counter += 1
            continue
        
        vp.render_image(size=(1920,1080),
                        filename=img_save_file_str,
                        background=(1,1,1),
                        frame=final_frame,
                        renderer=tachyon)
        created_png_files_counter += 1 
              
        # Create Videos
        vid_save_file   = ovito_file_dir / f"{unique_key}.avi"
        vid_save_file_str = str(vid_save_file)                                     

        # File Existance-Checker
        vid_exists   = any(ovito_dir.rglob(vid_save_file.name))
        if vid_exists:
            skipped_avi_files_counter += 1
            continue

        vp.render_anim(size=(1920,1080), 
                    filename=vid_save_file_str, 
                    fps=10,
                    renderer=tachyon)
        created_avi_files_counter += 1  


    # Print Skipped/Created files
    if skipped_ovito_files_counter:
        print(f"Skipped {skipped_ovito_files_counter} existing .ovito files")
    if skipped_png_files_counter:
        print(f"Skipped {skipped_png_files_counter} existing image files")
    if skipped_avi_files_counter:
        print(f"Skipped {skipped_avi_files_counter} existing .avi files")
    
    if created_ovito_files_counter:
        print(f"Created {created_ovito_files_counter} .ovito files")
    if created_png_files_counter:
        print(f"Created {created_png_files_counter} image files")
    if created_avi_files_counter:
        print(f"Created {created_avi_files_counter} .avi files")
    
    # Remove modifiers
    pipeline.modifiers.pop()
    pipeline.modifiers.pop()
    pipeline.modifiers.pop()


# ------ DATA GENERATION FUNCTIONS ------
# file_analysis(): 
#   1. uses imported_simulation_files from import_simulation_data()
#   2. uses the pipeline from empty_ovito_pipeline(): no modifiers by default
#   3. checks if files already exist in "Structural Analysis"
#   4. loads each file in datafiles into the existing pipeline
#   5. computes a specified data object for the given pipeline on each file and saves to a file name given by the "unique_key" + "data_tag"
#   NOTE:
#       a) requires "data_tag": e.g. "bond_length_data.txt" or "RDF_data.txt" (include file suffix, e.g. ".txt")
#       b) "data_function" refers to the ovito function that return the desired data object 
#               e.g. "data.particles['Coordination']" or "data.tables['coordination-rdf'].xy()" or "data.particles["c_pea"]" 
#       c) requires use of the "lambda data:" syntax for creating a throwaway function
#               e.g. When calling this func, use "file_analysis_and_existance_checker(datafiles,"ring_data",lambda data: data.tables["ring-size-histogram"].xy())""
#   6. Set_frame allows specification of which dump file to use (set_frame = 0 is the first file)
#   7. Set_frame_temp is used to name the files based on the temperature in the specified frame

analysis_tools = ["ovito", "matscipy"]
def file_analysis(data_dict, pipeline, data_tag, data_function, analysis_tool, set_frame, set_frame_temp):

    if not data_dict:
        raise ValueError("No datafiles provided")
    
    if analysis_tool not in analysis_tools:
        raise AttributeError(f"{analysis_tool} not recognized in analysis_tools")

    # Skipped file counter
    skipped_files_counter = 0
    created_files_counter = 0

    # ----- STRUCTURAL ANALYSIS -----
    for unique_key, dump_files in data_dict.items():

        # Check that set_frame is within range
        if set_frame > (len(dump_files) -1) or set_frame < 0:
            print(f"Specified frame out of range: 0 - {len(dump_files)-1}")
            return

        unique_key_pattern = re.compile(
        r'^(?P<element_symbol>[A-Za-z]{1,6})_'          # e.g. C
        r'(?P<potential_name>[^_]+)_'                   # e.g. GAP17
        r'(?P<simulation_type>[^_]+)_'                  # e.g. NVT
        r'(?P<num_atoms>\d+)_'                          # e.g. 64
        r'(?P<density>[\d.eE+-]+)_'                     # e.g. 1.5 or 1.85e+00
        r'(?P<run>\d+)'                                 # e.g. 1 (run number) 
        )

        m = unique_key_pattern.match(unique_key)
        if not m:
            print(f"Invalid unique_key name format: {unique_key}")
            
        element_symbol = m.group(1)
        potential_name = m.group(2)
        simulation_type = m.group(3)
        num_atoms = m.group(4)
        density = m.group(5)
        run = m.group(6)

        if element_symbol == "C":
            element = "Carbon"
        elif element_symbol == "Si":
            element = "Silicon"
        else:
            raise ValueError (f"Unrecognized element_symbol: {element_symbol}. Update symbol --> element mapping")

        # File Name
        data_file_dir = data_dir / f"Element: {element}" / f"Potential: {potential_name}" / f"Type: {simulation_type}" / f"Atoms: {num_atoms}" / f"Density: {density}" / f"Run: {run}"
        data_file_dir.mkdir(parents=True, exist_ok=True)
        data_file_name = data_file_dir / f"{unique_key}_{set_frame_temp}K_{data_tag}"

        # Structural Analysis File Existance-Checker
        data_exists = any(data_dir.rglob(data_file_name.name))
        if data_exists and not REPLACE_OLD_FILES:
            skipped_files_counter += 1
            continue 

        # Read files based on which tool is being used:
        # ovito favours LAMMPS
        if analysis_tool == "ovito":

            # Load new file into the pipeline and compute data for the specified frame        
            pipeline.source.load(dump_files)
            data = pipeline.compute(frame = set_frame)
        
        # matscipy favours ase
        if analysis_tool == "matscipy":

            # Load new file into ase
            data = read(dump_files[set_frame], format="lammps-dump-text")

        # Data function
        specific_data = data_function(data)
        np.savetxt(data_file_name, specific_data, delimiter=",", fmt="%.6f")
        created_files_counter += 1

    # Print Skipped/Created Files
    if skipped_files_counter:
        print(f"Skipped {skipped_files_counter} existing {data_tag} files")
    if created_files_counter:
        print(f"Created {created_files_counter} {data_tag} files")    

# ------ Ovito Analysis ------
def coordination_analysis(data_dict, pipeline, coordination_cutoff, set_frame, set_frame_temp):
    
    # Coordination Analysis Modfier
    coord_mod = CoordinationAnalysisModifier(cutoff=coordination_cutoff)
    pipeline.modifiers.append(coord_mod)

    # Analysis
    file_analysis(data_dict, pipeline, "coordination.txt", 
                  lambda data: data.particles['Coordination'], 
                  "ovito", set_frame, set_frame_temp)

    # Remove Modifier
    pipeline.modifiers.pop()
    
def energy_analysis(data_dict, pipeline, set_frame, set_frame_temp):

    # No modifier required

    # Analysis
    file_analysis(data_dict, pipeline, "potential_energy.txt", 
                  lambda data: data.particles["c_pea"], 
                  "ovito", set_frame, set_frame_temp)

def RDF_analysis(data_dict, pipeline, RDF_cutoff, bins, set_frame, set_frame_temp):
    
    # Coordination Analysis Modfier for RDF
    RDF_coord_mod = CoordinationAnalysisModifier(cutoff=RDF_cutoff, number_of_bins=bins)
    pipeline.modifiers.append(RDF_coord_mod)

    # Analysis
    file_analysis(data_dict, pipeline, "RDF.txt", 
                  lambda data: data.tables['coordination-rdf'].xy(), 
                  "ovito", set_frame, set_frame_temp)

    # Remove Modifier
    pipeline.modifiers.pop()    

def bond_length_analysis(data_dict, pipeline, bins, bond_length, 
                         bond_length_analysis_cutoff, set_frame, set_frame_temp):

    # Create Bonds Modifier
    bond_modifier = CreateBondsModifier(cutoff=bond_length)
    pipeline.modifiers.append(bond_modifier)

    # Bond Analysis Modifier
    bond_analysis_mod = BondAnalysisModifier(bins = bins, length_cutoff=bond_length_analysis_cutoff)
    pipeline.modifiers.append(bond_analysis_mod)

    # Analysis
    file_analysis(data_dict, pipeline, "bond_length.txt", 
                  lambda data: data.tables["bond-length-distr"].xy(), 
                  "ovito", set_frame, set_frame_temp)
  
    # Remove Modifiers
    pipeline.modifiers.pop()
    pipeline.modifiers.pop()
 
def force_analysis(data_dict, pipeline, set_frame, set_frame_temp):

    # No modifier required

    # Analysis
    file_analysis(data_dict, pipeline, "forces.txt", 
                  lambda data: data.particles["Force"],
                  "ovito", set_frame, set_frame_temp)

def bond_angle_analysis(data_dict, pipeline, bins, bond_length, set_frame, set_frame_temp):
    
    # Create Bonds Modifier
    bond_modifier = CreateBondsModifier(cutoff=bond_length)
    pipeline.modifiers.append(bond_modifier)

    # Bond Analysis Modifier
    bond_analysis_mod = BondAnalysisModifier(bins = bins)
    pipeline.modifiers.append(bond_analysis_mod)

    # Analysis
    file_analysis(data_dict, pipeline, "bond_angle.txt", 
                  lambda data: data.tables["bond-angle-distr"].xy(),
                    "ovito", set_frame, set_frame_temp)
  
    # Remove Modifiers
    pipeline.modifiers.pop()
    pipeline.modifiers.pop()
# --------------------------------------------

# ------ MatSciPy Analysis ------
def ring_analysis(data_dict, max_ring_size, bond_length, set_frame, set_frame_temp):

    # Analysis
    file_analysis(data_dict, pipeline, "ring.txt", 
                  lambda data: ring_statistics(data, cutoff=bond_length, maxlength=max_ring_size), 
                  "matscipy", set_frame, set_frame_temp)

# -----------------------
# Use carefully - will regenerate ALL files (apart from renders)
REPLACE_OLD_FILES = True

if REPLACE_OLD_FILES:
    confirm = input("Are you sure you want to replace old files? (y/n): ").strip().lower()
    if confirm != "y":
        REPLACE_OLD_FILES = False
# -----------------------

final_frame = 95
final_temp = 300

final_liquid_frame = 60
liquid_temp = 5000

force_analysis(imported_simulation_files, pipeline, final_frame, final_temp)

bond_length_analysis(imported_simulation_files, pipeline, bins=1000, 
                     bond_length = 1.85, bond_length_analysis_cutoff=2.0,
                     set_frame=final_frame, set_frame_temp=final_temp)

RDF_analysis(imported_simulation_files, pipeline, 
             RDF_cutoff=6.0, bins=200, 
             set_frame=final_frame, set_frame_temp=final_temp)

RDF_analysis(imported_simulation_files, pipeline, 
             RDF_cutoff=6.0, bins=200, set_frame=final_liquid_frame, 
             set_frame_temp=liquid_temp)

ring_analysis(imported_simulation_files, max_ring_size=24, 
              bond_length=1.85, set_frame=final_frame, 
              set_frame_temp=final_temp)

coordination_analysis(imported_simulation_files, pipeline, 
                      coordination_cutoff=1.85, set_frame=final_frame, 
                      set_frame_temp=final_temp)

energy_analysis(imported_simulation_files, pipeline,
                set_frame=final_frame, set_frame_temp=final_temp)

bond_angle_analysis(imported_simulation_files, pipeline, bins=40, 
                    bond_length = 1.85, set_frame=final_frame, 
                    set_frame_temp=final_temp)

bond_angle_analysis(imported_simulation_files, pipeline, bins=40, 
                    bond_length = 1.85, set_frame=final_liquid_frame, 
                    set_frame_temp=liquid_temp)

ovito_analysis(imported_simulation_files, pipeline)

In [None]:
# ------ GRAPHICAL ANALYSIS --------

# Graphical data points are means of all repeat runs with errors given as 1 standard deviation
import pandas as pd
import numpy as np
import re

# Create Graphical Analysis Directories
from pathlib import Path

# ------ FIGURE FORMATTING ------
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
plt.style.use('1_column_fig.mplstyle')
# -------------------------------

# ------ IMPORT DATA FILES ------
# 1. Searches recursively through the specified directory
# 2. Creates a dataframe: 
#    imported_data_files_df, with columns:
#        element, potential, simulation type, atoms, density, run number, data tag, file_data
# 3. Only includes files with the specified "data_tag" 

def import_data_files(directory, data_tag):
    
    directory = Path(directory)

    # Regex pattern for reading "unique_key" + "data_tag""
    data_file_name = re.compile(
    r'^(?P<element_symbol>[A-Za-z]{1,6})_'          # e.g. C
    r'(?P<potential_name>[^_]+)_'                   # e.g. GAP17
    r'(?P<simulation_type>[^_]+)_'                  # e.g. NVT
    r'(?P<num_atoms>\d+)_'                          # e.g. 64
    r'(?P<density>[\d.eE+-]+)_'                     # e.g. 1.5 or 1.85e+00
    r'(?P<run>\d+)_'                                # e.g. 1 (run number)
    r'(?P<temperature>\d+)K'                       # e.g. 300 or 5000
    r'(?:_(?P<data_tag>.+)|(?P<data_tag2>\..+))$'   # e.g. ring.txt or .png (allows underscore after run_number or .avi etc...)   
    )  

    imported_data_files_rows = []

    skipped_data_files_counter = 0
    imported_data_files_counter = 0

    for path in directory.rglob("*"):
    
        if not path.is_file(): # Filters for files not directories
            continue

        m = data_file_name.match(path.name) # Enforce data file naming
        if not m:
            print(f"ERROR: Skipped {path.name}. Invalid data file name")
            skipped_data_files_counter += 1
            continue

        # Parse "unique_key" + "data_tag" components
        element_symbol    = m.group("element_symbol")
        potential_name    = m.group("potential_name")
        simulation_type   = m.group("simulation_type")
        num_atoms         = m.group("num_atoms")
        density           = m.group("density")
        run_number        = m.group("run")
        temperature       = m.group("temperature")
        file_data_tag     = (m.group("data_tag") or m.group("data_tag2") or "").lstrip("._")


        # Only import files with the correct data_tag
        if file_data_tag != data_tag:
            continue
        
        # Load data from each file
        try:
            file_data = np.loadtxt(path, delimiter = ',')
        except Exception as e:
            print(f"Unable to load {path}: {e}, skipping...")
            continue

        # Check that data files aren't empty
        if file_data is None or getattr(file_data, "size", 0) == 0:
            print(f"No data found in {path}")
            continue

        imported_data_files_rows.append({"element": element_symbol, "potential" : potential_name,
                                          "simulation_type": simulation_type, "num_atoms": num_atoms, 
                                          "density": density, "run_number": run_number, 
                                          "temperature": temperature,
                                          "data_tag": file_data_tag, "file_data": file_data})
        
        imported_data_files_counter += 1
        
    imported_data_files_df = pd.DataFrame(imported_data_files_rows)

    print()
    if imported_data_files_counter:
        print(f"Imported {imported_data_files_counter} {data_tag} files")
    if skipped_data_files_counter:
        print(f"Skipped {skipped_data_files_counter} {data_tag} files")
    
    return imported_data_files_df
#-------------------------------

# ------ DATA ANALYSIS ----------
# 1. Imports data using import_data_files()
# 2. If independent_var=False: independent variable is defaulted to float(density)
#       a) Applies unique_data_function to file_data to generate scalar values
#       b) Returns dataframe with scalar values in column: dep_var
#       c) Transforms density column into column: ind_var
#    If independent_var=True:
#       a) Creates 2 columns from file_data: ind_var, dep_var (explodes arrays into scalar values)
#       b) If the array is 1D (in the case of MatSciPy ring data), the index is taken as the ind_var
# 4. Takes the mean and std of dep_var for each run number
# 5. Returns analysed_data_files_df with 'mean', 'std' columns for dep var

# Note: Assumes no analysis required when both independent and dependant variables are imported

def data_analysis(df, unique_data_function, independent_var):

    # Analyse data files
    analysed_data_files_df = df

    # For independent_var=True, explode the arrays into scalar values 

    if independent_var:
        # Split file_data into ind_var and dep_var
        pieces = []
        meta_cols = ['element','potential','simulation_type','num_atoms','density','data_tag','run_number', 'temperature']

        # Check if the data is 2D or 1D
        for _, row in analysed_data_files_df.iterrows():
            
            arr = np.asarray(row['file_data'])
            
            # Skip empty arrays
            if arr.size == 0:
                continue  
            
            # Add an index to 1D arrays before expanding into 2 columns (for matsci ring files)
            # The index == ring size
            if arr.ndim == 1:
                arr = np.column_stack((np.arange(arr.size), arr))

            # Verify that 1D array has been converted to 2D, or input file was originally 2D
            if arr.ndim != 2 or arr.shape[1] != 2:
                raise ValueError("Imported dataframe not recognized as accepted 1D or 2D. Check data shape"
                                 "\n If importing 1D data, verify that the data is indexable (this generates the second column")

            piece = pd.DataFrame(arr, columns=['ind_var', 'dep_var'])
            
            # Attatch other column data
            for k in meta_cols:
                piece[k] = row[k]
                
            pieces.append(piece)

        if not pieces:
            print(f"No valid arrays to explode for {analysed_data_files_df['data_tag']}")
            return None

        # concatenate into one DataFrame
        analysed_data_files_df = pd.concat(pieces, ignore_index=True)
  
    
    else:
        # Perform unique_data_function to generate scalar values
        try:
            analysed_data_files_df['dep_var'] = analysed_data_files_df['file_data'].apply(unique_data_function)
             # Ind_var is a copy of density by default (formated as float)
            analysed_data_files_df['ind_var'] = analysed_data_files_df['density'].copy().astype(float)
        except Exception as e:
            print(f"ERROR: Failed unique_data_function for {analysed_data_files_df['data_tag']}: {e}")
            return None

    # Find mean/std of each scalar value in dep_var
    # Groups columns by simulation runs
    grouping_keys = ["element","potential","simulation_type","num_atoms","density", "data_tag", "temperature", "ind_var"]
    grouped_runs = [c for c in grouping_keys if c in analysed_data_files_df.columns]
    if not grouped_runs:
        raise ValueError("No grouping keys found in imported DataFrame. Check import_data_files output.")

    analysed_data_files_df = analysed_data_files_df.groupby(grouped_runs).agg(mean=('dep_var', 'mean'),
                                                                            std=('dep_var', 'std')
                                                                            ).reset_index()

    return analysed_data_files_df 
# ---------------------------------

# ------ PLOT ------ 

# Additional file tag: for adding sp/sp2/sp3 or 5/6/7 ring labels
def single_plot(df, plot_type, x_label, y_label, chart_title, independent_var, additional_file_tag):

    def make_single_plot(group, plot_type, x_label, y_label, chart_title, graph_save_path):

        graph_save_path = Path(graph_save_path)
        
        # Skip if file exists
        if graph_save_path.exists():
            if OVERWRITE_GRAPH:
                graph_save_path.unlink()
            else:
                return False
        
        # Local figure size for each plot            
        fig, ax = plt.subplots()  
        x, mean, std = group["ind_var"], group["mean"], group["std"]


        if plot_type == "marker":
            ax.errorbar(x, mean, yerr=std, capthick=0.5, elinewidth=0.5)
        elif plot_type == "line":
            alpha_fill = 0.25
            ax.plot(x, mean, label="Mean", marker = "")
            ax.fill_between(x, mean - std, mean + std, alpha=alpha_fill)

        ax.xaxis.set_major_locator(ticker.MaxNLocator(number_of_x_axis_ticks))

        ax.set(xlabel=x_label, ylabel=y_label, title=chart_title)

        # Save Plot 
        fig.savefig(graph_save_path)
        plt.close(fig)  # Close figure to free memory

        print(f"{graph_save_path.name} created")
        return True

    skipped_graphs = 0

    if independent_var:
        # Group by unique_data_key including density for each plot
        group_cols = ['element','potential','simulation_type','num_atoms', 'density', 'data_tag', "temperature"]
        for (e,p,s,n,d,da,t), group in df.groupby(group_cols):
            
            # Only analyse key analysis densities
            if float(d) not in key_analysis_densities:
                continue

            # Naming and folder
            # File Name
            da = Path(da).stem
            graph_file_dir = single_plot_dir / f"Element: {e}" / f"Potential: {p}" / f"Type: {s}" / f"Atoms: {n}" / f"Plot Type: {da}" / f"Temperature: {t}"
            graph_file_dir.mkdir(parents=True, exist_ok=True)
            
            if not additional_file_tag:
                graph_file_name = f"{e}_{p}_{s}_{n}_{d}_{t}K_{da}.{set_global_file_format}"
            else:
                graph_file_name = f"{e}_{p}_{s}_{n}_{d}_{t}K_{da}_{additional_file_tag}.{set_global_file_format}"
            
            graph_save_path = graph_file_dir / graph_file_name

            # Add density and temp to the chart title
            d_chart_title = f"{chart_title} {d} g/cm³ {t} K"

            make_graph = make_single_plot(group, plot_type, x_label, y_label, d_chart_title, graph_save_path)
            if not make_graph:
                skipped_graphs += 1
    
    else:
        # Group by unique_data_key excluding density for each vs. density plot
        group_cols = ['element','potential','simulation_type','num_atoms','data_tag', "temperature"]
        for (e,p,s,n,da,t), group in df.groupby(group_cols):

            # Naming and folder
            da = Path(da).stem
            graph_file_dir = single_plot_dir / f"Element: {e}" / f"Potential: {p}" / f"Type: {s}" / f"Atoms: {n}" / f"Plot Type: Density" / f"Temperature: {t}"
            graph_file_dir.mkdir(parents=True, exist_ok=True)
            
            if not additional_file_tag:
                graph_file_name = f"{e}_{p}_{s}_{n}_{t}K_{da}.{set_global_file_format}"
            else:
                graph_file_name = f"{e}_{p}_{s}_{n}_{t}K_{da}_{additional_file_tag}.{set_global_file_format}" 

            graph_save_path = graph_file_dir / graph_file_name

            make_graph = make_single_plot(group, plot_type, x_label, y_label, chart_title, graph_save_path)
            if not make_graph:
                skipped_graphs += 1
    
    if skipped_graphs:
        print(f"Skipped single plots: {skipped_graphs}")

def potential_comparison_plot(df, plot_type, x_label, y_label, chart_title, independent_var, additional_file_tag):

    specific_potential_comparison_dir_name = "_".join(potential_comparison_list)    

    def make_comparison_plot(group, plot_type, x_label, y_label, chart_title, graph_save_path):
        for potential, subdf in group.groupby('potential'):

            graph_save_path = Path(graph_save_path)
            
            # Skip if file exists
            if graph_save_path.exists():
                if OVERWRITE_GRAPH:
                    graph_save_path.unlink()
                else:
                    return False

            # Only use potentials in potential_comparison_list
            if potential not in potential_comparison_list:
                continue

            x, mean, std = subdf["ind_var"],subdf["mean"], subdf["std"]

            if plot_type == "marker":
                ax.errorbar(x, mean, yerr=std, capthick=0.5, elinewidth=0.5, label = potential)
            elif plot_type == "line":
                alpha_fill = 0.15
                ax.plot(x, mean, label=potential, marker = "")
                ax.fill_between(x, mean - std, mean + std, alpha=alpha_fill)

        ax.set(xlabel=x_label, ylabel=y_label, title=chart_title)
        ax.legend()
        
        # Save Plot 
        fig.savefig(graph_save_path)
        plt.close(fig)  # Close figure to free memory
        print(f"{graph_save_path.name} created")
        return True
        

    potentials_required = set(potential_comparison_list)

    skipped_graphs = 0
    
    if independent_var:

        # Group by unique_data_key including density and potential for each plot
        group_cols = ['element','simulation_type','num_atoms', 'density', 'data_tag', 'temperature']
        for (e,s,n,d,da,t), group in df.groupby(group_cols):

            # Only analyse key analysis densities
            if float(d) not in key_analysis_densities: 
                continue

            # Only compare plots if there is data for all specified densities
            available = set(group['potential'].unique())
            if not potentials_required.issubset(available):
                continue

            fig, ax = plt.subplots()
            ax.xaxis.set_major_locator(ticker.MaxNLocator(number_of_x_axis_ticks))

            # Naming and folder
            da = Path(da).stem
            graph_file_dir = potential_comparison_dir / f"Element: {e}" / f"Type: {s}" / f"Atoms: {n}" / specific_potential_comparison_dir_name / f"Plot Type: {da}"/  f"Temperature: {t}"  
            graph_file_dir.mkdir(parents=True, exist_ok=True)
            
            if not additional_file_tag:
                graph_file_name = f"{e}_{s}_{n}_{d}_{t}K_{da}_{specific_potential_comparison_dir_name}.{set_global_file_format}"
            else:
                graph_file_name = f"{e}_{s}_{n}_{d}_{t}K_{da}_{additional_file_tag}_{specific_potential_comparison_dir_name}.{set_global_file_format}"
            
            graph_save_path = graph_file_dir / graph_file_name

            # Add density to the chart title
            d_chart_title = f"{chart_title} {d} g/cm³ {t} K"

            make_graph = make_comparison_plot(group, plot_type, x_label, y_label, d_chart_title, graph_save_path)

            if not make_graph:
                plt.close(fig)
                skipped_graphs += 1

    else:
        
        # Group by unique_data_key excluding density and potential for each vs. density plot
        group_cols = ['element','simulation_type','num_atoms','data_tag', 'temperature']
        for (e,s,n,da,t), group in df.groupby(group_cols):

            # Only compare plots if there is data for all specified densities
            available = set(group['potential'].unique())
            if not potentials_required.issubset(available):
                continue

            fig, ax = plt.subplots()
            ax.xaxis.set_major_locator(ticker.MaxNLocator(number_of_x_axis_ticks))
            
            # Naming and folder
            da = Path(da).stem
            graph_file_dir = potential_comparison_dir / f"Element: {e}" / f"Type: {s}" / f"Atoms: {n}" / specific_potential_comparison_dir_name / f"Plot Type: Density" /  f"Temperature: {t}"
            graph_file_dir.mkdir(parents=True, exist_ok=True)
            
            if not additional_file_tag:
                graph_file_name = f"{e}_{s}_{n}_{t}K_{da}_{specific_potential_comparison_dir_name}.{set_global_file_format}"
            else:
                graph_file_name = f"{e}_{s}_{n}_{t}K_{da}_{additional_file_tag}_{specific_potential_comparison_dir_name}.{set_global_file_format}"
            
            graph_save_path = graph_file_dir / graph_file_name

            make_graph = make_comparison_plot(group, plot_type, x_label, y_label, chart_title, graph_save_path)
            
            if not make_graph:
                plt.close(fig)
                skipped_graphs += 1
    
    if skipped_graphs:
        print(f"Skipped potential comparison plots: {skipped_graphs}")
# ------------------

# -------------------------------------------------------------------------------------------------
# Instructions: 
# 1. Assign data_tag, chat_title, save_file_name (RAW TEXT ONLY, NO PATHS), and y_label 
# 2. Create a function that returns a scalar from a given structure file (e.g. mean bond length))
# 3. Call import_analyse_plot, using your new function as its unique_data_function
# 4. Set independent_var=True to plot for only 1 density (e.g. RDF, ring histogram, bond angle distribution)
#   a) This requires both an independent and dependant variable, sometimes included in structural_analysis_file, otherwise must be generated
#   b) Default generation for a 1D numpy array returns the array index as the independent variable
# --------------------------------------------------------------------------------------------------

# Function Wrapper
def import_analyse_plot(directory, data_tag, unique_data_function, 
                        plot_type, x_label, y_label, chart_title, 
                        independent_var, additional_file_tag):
    
    # Import Data Files
    df = import_data_files(directory, data_tag)
    if df.empty:
        print(f"No data files imported for data_analysis()")
        return None

    # Analyse Data Files
    df = data_analysis(df, unique_data_function, independent_var)

    # Plot
    single_plot(df, plot_type, x_label, y_label, chart_title, independent_var, additional_file_tag)
    potential_comparison_plot(df, plot_type, x_label, y_label, chart_title, independent_var, additional_file_tag)

# Coordination analysis 
def coordination_analysis(directory, coordination_number):
    
    # Label coordination number 
    mapping = {
        2: ("sp", "sp Carbon Proportion"),
        3: ("sp2", "sp2 Carbon Proportion"),
        4: ("sp3", "sp3 Carbon Proportion")
    }
    env, y_label = mapping.get(coordination_number, (None, None))

    if env is None:
        print("ERROR: Coordination number should be between 2 and 4")
        env = f"{coordination_number}_coordinate"
        y_label = f"{coordination_number} coordinate atoms"
        
    data_tag = "coordination.txt"
    chart_title = f"Coordination vs. Density"
    x_label = "Density (g/cm³)"
    
    # unique_data_function must be a callable that takes the loaded numpy array and returns a scalar
    def coord_function(data: np.ndarray):
        return float((np.count_nonzero(data == coordination_number) / data.size))

    import_analyse_plot(directory, data_tag, coord_function, 
                        "line", x_label, y_label, chart_title, 
                        independent_var=False, additional_file_tag= env)

# Ring Size analysis
def ring_analysis(directory, ring_size):
        
    data_tag = "ring.txt"
    chart_title = f"Number of {ring_size} Membered Rings vs. Density"
    x_label = "Density (g/cm³)"
    y_label = f"{ring_size} Membered Rings"

    # unique_data_function must be a callable that takes the loaded numpy array and returns a scalar
    def ring_function(data: np.ndarray):
        if ring_size < len(data):
            return float(data[ring_size])
        return float("nan")
    
    import_analyse_plot(directory, data_tag, ring_function, 
                        "line", x_label, y_label, chart_title, 
                        independent_var=False, additional_file_tag=ring_size)
   
# Potential energy analysis 
def potential_energy_analysis(directory):

    data_tag = "potential_energy.txt"
    chart_title = f"Mean Potential Energy vs. Density"
    x_label = "Density (g/cm³)"
    y_label = 'Mean Potential Energy (eV)'
    
    # unique_data_function must be a callable that takes the loaded numpy array and returns a scalar
    def PE_function(data: np.ndarray):
        
        return np.mean(data)
    
    import_analyse_plot(directory, data_tag, PE_function, 
                        "line", x_label, y_label, chart_title, 
                        independent_var=False, additional_file_tag=None)

# Bond Length analysis
def bond_length_analysis(directory):
    
    data_tag = "bond_length.txt"
    chart_title = f"Mean Bond Length vs. Density"
    x_label = "Density (g/cm³)"
    y_label = 'Mean Bond Length (Å)'

    def bond_length_function (data: np.array):
        return np.average(data[:, 0], weights=data[:, 1])

    import_analyse_plot(directory, data_tag, bond_length_function, 
                        "line", x_label, y_label, chart_title, 
                        independent_var=False,additional_file_tag=None)

# Force Analysis
def force_analysis(directory):
        
    data_tag = "forces.txt"
    chart_title = "Mean Force Magnitude vs. Density"
    x_label = "Density (g/cm³)"
    y_label = "Mean Force Magnitude (eV/Å)"

    # unique_data_function must be a callable that takes the loaded numpy array and returns a scalar
    def force_function(data: np.ndarray):
        return np.mean(np.linalg.norm(data, axis=1))
    
    import_analyse_plot(directory, data_tag, force_function, 
                        "line", x_label, y_label, chart_title, 
                        independent_var=False,additional_file_tag=None)

# Bond Angle Analysis
def bond_angle_analysis(directory):
    
    data_tag = "bond_angle.txt"
    chart_title = f"Mean Bond Angle"
    x_label = "Density (g/cm³)"
    y_label = "Degrees (°)"

    def bond_angle_function (data: np.array):
        return np.average(data[:, 0], weights=data[:, 1])

    import_analyse_plot(directory, data_tag, bond_angle_function, 
                    "line", x_label, y_label, chart_title, 
                    independent_var=False, additional_file_tag=None)

# RDF Analysis
def RDF_analysis(directory):
    
    data_tag = "RDF.txt"
    chart_title = "Radial Distribution Function"
    y_label = "g(r)"
    x_label = "r (Å)"
    
    import_analyse_plot(directory, data_tag, None, 
                        "line", x_label, y_label, chart_title, 
                        independent_var=True, additional_file_tag=None)

# Ring Histogram Analysis
def ring_size_distribution_analysis(directory):
    
    data_tag = "ring.txt"
    chart_title = "Ring Size Distribution"
    y_label = "Frequency"
    x_label = "Ring Size"
    
    import_analyse_plot(directory, data_tag, None, 
                        "marker", x_label, y_label, chart_title, 
                        independent_var=True,additional_file_tag=None)

# Bond Angle distribution analysis
def bond_angle_distribution_analysis(directory):

    data_tag = "bond_angle.txt"
    chart_title = f"Bond Angle Distribution"
    x_label = "Degrees (°)"
    y_label = "Frequency"

    import_analyse_plot(directory, data_tag, None, 
                        "line", x_label, y_label, chart_title, 
                        independent_var=True,additional_file_tag=None)

# Wrapper for all functions 
# (excluding energy/forces becuase these conflate the structural and energetic differences between the models)
def graphical_analysis_wrapper(OVERWRITE_GRAPH, set_data_dir):

    # AttributeError: 'NoneType' object has no attribute 'groupby' 
    #   From mis-assigning independent-var = False when it is actually true

    if OVERWRITE_GRAPH:
        confirm = input("Are you sure you want to replace graphs? (y/n): ").strip().lower()
        if confirm != "y":
            OVERWRITE_GRAPH = False

    for i in range (2,5):
        coordination_analysis(set_data_dir, coordination_number=i)

    bond_length_analysis(set_data_dir)

    # potential_energy_analysis(set_data_dir)

    for i in range (5,8):
        ring_analysis(set_data_dir, ring_size=i)

    # force_analysis(set_data_dir

    RDF_analysis(set_data_dir)

    ring_size_distribution_analysis(set_data_dir)

    bond_angle_distribution_analysis(set_data_dir)

    bond_angle_analysis(set_data_dir)
# -------------------------------------------------------------------------------------------------

# ------------------------- ANALYSIS PARAMETERS ------------------------------------
all_potentials = ["GAP17", "mace-mp-0b3", "mace-mpa-0", "mace-omat-0"]

# List the potentials to compare (Will not attempt to make graphs for non-existant potentials)
potential_comparison_list = ["GAP17", "mace-mp-0b3", "mace-mpa-0", "mace-omat-0"]

# List the densities in analyse (Will not attempt to make graphs for non-existant densities)
key_analysis_densities = [1.5, 2.0, 2.5, 3.0, 3.5]

number_of_x_axis_ticks = len(key_analysis_densities) # Set number of x axis ticks 

OVERWRITE_GRAPH = False

# File format: png for viewing, pdf for saving and publishing
set_global_file_format = "pdf"
# ------------------------------------------------------------------------------------

# ------------------------------ FILE STORAGE ----------------------------------------
cwd = Path.cwd()
analysis_dir = cwd / "Analysis"
structural_analysis_dir = analysis_dir / "Amorphous Structural Analysis"
graphs_dir = structural_analysis_dir / "Graphs"
graph_dir = graphs_dir / f"{set_global_file_format} Graphs"
single_plot_dir = graph_dir / "Single Plots"
potential_comparison_dir = graph_dir / "Potential Comparison Plots"
# -------------------------------------------------------------------------------------

graphical_analysis_wrapper(OVERWRITE_GRAPH, data_dir)