In [None]:
# ------ EXTXYZ DATASETs ------
import warnings
warnings.filterwarnings('ignore', message='.*OVITO.*PyPI')
from analysis_utils import *
from pathlib import Path
import matplotlib

out_dir = Path("Analysis/Dataset_Analysis")

# Function to collect all desired data
def analyse_dataset(dataset, energy_offset):

    # Calculator and model to use for energy estimates
    calc = create_calculator(path_to_model="potentials/medium-mpa-0.pt", eval=True)

    # Reference structure for calculating formation energies
    graphite_structure = "Carbon_Structures/Relaxed_Reference_Structures/Final Trajectory Frame/medium-mpa-0/Graphite.cif"
    graphite_energy = predict_energies(calc, graphite_structure)
    offset_graphite_energy = graphite_energy + energy_offset

    # Extract energies and forces from labelled structures and calculate formation energies
    e_centres, e_prob = parse_energies(dataset, histogram=True, bins=20)
    e_centres = e_centres - offset_graphite_energy
    f_centers, f_prob   = parse_forces(dataset, histogram=True, bins=200)

    # Calculate mean rdf, adf, voronoi volume distribution for the dataset
    r, g_r, r_std = calculate_rdf_mean_std(dataset, rdf_bins=200)
    angle, a_prob, a_std = calculate_adf_mean_std(dataset, adf_bins=40, angle_min=45)
    volume, v_prob, v_std = voronoi_volume_mean_std(dataset, bins = 200)

    return e_centres, e_prob, f_centers, f_prob, r, g_r, r_std, angle, a_prob, a_std, volume, v_prob, v_std


# Datasets to analyse
amorphous_training_set = "Labelled_Datasets_New/Sets/Amorphous_64.extxyz"
crystalline_training_set = "Labelled_Datasets_New/Sets/Crystalline_64-100.extxyz"
liquid_training_set = "Labelled_Datasets_New/Sets/Liquid_64.extxyz"
energy_offset = -148.74354927842734

# Assign styles
cmap = matplotlib.colormaps['viridis']
colors = [cmap(0.0), cmap(0.5), cmap(1.0)]
dataset_styling = {
    'Amorphous_64.extxyz': {'color': colors[0], 'marker': '', 'linestyle': '-'},
    'Crystalline_64-100.extxyz': {'color': colors[1], 'marker': '', 'linestyle': '--'},
    'Liquid_64.extxyz': {'color': colors[2], 'marker': '', 'linestyle': ':'}
}

# Analyse datasets
data = {}
for dataset in [amorphous_training_set, crystalline_training_set, liquid_training_set]:
    dataset_name = Path(dataset).name
    result = analyse_dataset(dataset, energy_offset)
    data[dataset_name] = {
        'e_centres': result[0],
        'e_prob': result[1],
        'f_centers': result[2],
        'f_prob': result[3],
        'r': result[4],
        'g_r': result[5],
        'r_std': result[6],
        'angle': result[7],
        'a_prob': result[8],
        'a_std': result[9],
        'volume': result[10],
        'v_prob': result[11],
        'v_std': result[12]
    }

# Create multiplot datasets
rdf_multiplot_dataset = [
    {
        'x': data[dataset_name]['r'],
        'y': data[dataset_name]['g_r'],
        'std': data[dataset_name]['r_std'],
        'label': dataset_name,
        **dataset_styling[dataset_name]
    }
    for dataset_name in data.keys()
]

adf_multiplot_dataset = [
    {
        'x': data[dataset_name]['angle'],
        'y': data[dataset_name]['a_prob'],
        'std': data[dataset_name]['a_std'],
        'label': dataset_name,
        **dataset_styling[dataset_name]
    }
    for dataset_name in data.keys()
]

voronoi_multiplot_dataset = [
    {
        'x': data[dataset_name]['volume'],
        'y': data[dataset_name]['v_prob'],
        'std': data[dataset_name]['v_std'],
        'label': dataset_name,
        **dataset_styling[dataset_name]
    }
    for dataset_name in data.keys()
]

energy_multiplot_dataset = [
    {
        'x': data[dataset_name]['e_centres'],
        'y': data[dataset_name]['e_prob'],
        'label': dataset_name,
        **dataset_styling[dataset_name]
    }
    for dataset_name in data.keys()
]

forces_multiplot_dataset = [
    {
        'x': data[dataset_name]['f_centers'],
        'y': data[dataset_name]['f_prob'],
        'label': dataset_name,
        **dataset_styling[dataset_name]
    }
    for dataset_name in data.keys()
]

# Plot graphs
# Plot graphs
rdf_plot = scatter_plot(
    multiplot_datasets=rdf_multiplot_dataset,
    xlabel=r'r ($\AA$)',
    ylabel="g(r)",
    plot_name="rdf",
    out_dir=out_dir,
    pdf=True,
    legend=True,
    peak_pick=2,
    shading_alpha=0.1,
    lower_std_limit=0
)

adf_plot = scatter_plot(
    multiplot_datasets=adf_multiplot_dataset,
    xlabel=r'Angle ($^\circ$)',
    ylabel="Probability Density",
    plot_name="adf",
    out_dir=out_dir,
    pdf=True,
    legend=True,
    peak_pick=1,
    shading_alpha=0.1,
    lower_std_limit=0
)

voronoi_plot = scatter_plot(
    multiplot_datasets=voronoi_multiplot_dataset,
    xlabel="Scaled Particle Voronoi Volume",
    ylabel="Probability",
    plot_name="voronoi_volume",
    out_dir=out_dir,
    pdf=True,
    legend=True,
    peak_pick=1,
    shading_alpha=0.1,
    lower_std_limit=0
)

energy_plot = scatter_plot(
    multiplot_datasets=energy_multiplot_dataset,
    xlabel="Formation Energy (eV / Atom)",
    ylabel="Probability",
    plot_name="energy",
    out_dir=out_dir,
    pdf=True,
    legend=True,
    peak_pick=1,
    shading_alpha=0.1,
    lower_std_limit=0
)

forces_plot = scatter_plot(
    multiplot_datasets=forces_multiplot_dataset,
    xlabel=r'Force (eV / $\AA$)',
    ylabel="Probability",
    plot_name="forces",
    out_dir=out_dir,
    pdf=True,
    legend=True,
    peak_pick=1,
    shading_alpha=0.1,
    lower_std_limit=0
)
