In [None]:
import json
import pathlib

import matplotlib.ticker
import matplotlib.pylab as plt
import pandas as pd

from hubbardml import keys
from hubbardml import plots
from hubbardml import datasets

import predict_iterations

In [None]:
# Set the path of the training experiment to use

EXPERIMENT_DIR = pathlib.Path(
    "outputs/predict_iterations/batch_size=16,max_epochs=6000/2024-04-04_16-18-31"
)

DATASET = "dataset.json"
RESULTS_FILE = "hubbard_u_iterations.json"

In [None]:
uv_data = pd.read_json(EXPERIMENT_DIR / DATASET)
uv_data

In [None]:
def create_plots(df: pd.DataFrame, logy=False, scale=1.0, include_train=False):
    if include_train and predict_iterations.Keys.TRAIN_RMSE in df:
        cols = [
            predict_iterations.Keys.TRAIN_RMSE,
            predict_iterations.Keys.MODEL_RMSE,
            predict_iterations.Keys.REF_RMSE
        ]
        series_labels = 'ML (training)', 'ML (validation)', "Reference"
        colours = (
            plots.train_validate_colours[keys.TRAIN],
            plots.train_validate_colours[keys.VALIDATE],
            plots.train_validate_colours[keys.REFERENCE],
        )
    else:
        cols = [predict_iterations.Keys.MODEL_RMSE, predict_iterations.Keys.REF_RMSE]
        series_labels = 'ML', "Reference"
        colours = plots.train_validate_colours[keys.VALIDATE], plots.train_validate_colours[keys.REFERENCE],

    fig = plt.figure(figsize=(6 * scale, 4 * scale))
    ax = fig.gca()

    print(df)

    ax = df.plot(
        x=keys.UV_ITER,
        y=cols,
        ax=ax,
        color=colours,
        marker='o',
        linestyle="--",
        logy=logy,
        # title="RMSE as function of training iterations",
        ylabel='RMSE (eV)',
        xlabel="$N_\mathrm{iter}$",  # training on all previous linear-response results
    );

    ax.legend(series_labels)
    ax.xaxis.set_major_locator(matplotlib.ticker.MaxNLocator(integer=True))
    ax.set_ylim([0, 1.1])
    # ax.set_yscale("log")

    return ax

# Data analysis

In [None]:
results_file = EXPERIMENT_DIR / RESULTS_FILE
with open(results_file, 'r') as file:
    hubbard_u_iterations = pd.DataFrame(json.load(file))
hubbard_u_iterations

In [None]:
output_dir = EXPERIMENT_DIR
plot_dir = output_dir / "plots"
plot_dir.mkdir(exist_ok=True)

ax = create_plots(hubbard_u_iterations, scale=0.7)
ax.get_figure().savefig(plot_dir / f"iteration_comparison.pdf", bbox_inches='tight')
(plot_dir / f"iteration_comparison.pdf").absolute()

In [None]:
output_dir = EXPERIMENT_DIR
plot_dir = output_dir / "plots"
plot_dir.mkdir(exist_ok=True)

ax = create_plots(hubbard_u_iterations, scale=0.7, include_train=True)
ax.get_figure().savefig(plot_dir / f"iteration_comparison_w_train.pdf", bbox_inches='tight')

In [None]:
uv_iter = 2
predictions_frame = pd.read_json(EXPERIMENT_DIR / f"iter={uv_iter}" / DATASET)

In [None]:
frame = predictions_frame.loc[(predictions_frame[keys.TRAINING_LABEL] != keys.TRAIN)]
plots.split_plot(
    frame,
    keys.LABEL,
    axis_label="Hubbard param. (eV)",
    # title=f"RMSE = {datasets.rmse(frame):.2f} eV",
);

In [None]:
plot_info = {
    "label": ["ML", "Reference"],
    "key": [keys.PARAM_OUT_PREDICTED, keys.PARAM_IN]
}

for uv_iter in list(sorted(predictions_frame[keys.UV_ITER].unique()))[1:]:
    predictions_frame = pd.read_json(EXPERIMENT_DIR / f"iter={uv_iter}" / DATASET)

    for idx in range(len(plot_info["label"])):
        label = plot_info["label"][idx]
        key = plot_info["key"][idx]

        frame = predictions_frame[
            (predictions_frame[keys.TRAINING_LABEL] == keys.VALIDATE) &
            (predictions_frame[keys.UV_ITER] == uv_iter)
            ]

        # Calculate the overall RMSE
        rmse = datasets.rmse(frame, prediction_key=key, training_label=None)

        # Parity plot split by element
        fig = plots.split_plot(
            frame,
            keys.LABEL,
            axis_label="Hubbard param. (eV)",
            title=f"{label} ($N_\mathrm{{iter}}$)",
            prediction_key=key
        )
        if label == "Reference":
            axis = fig.gca()
            axis.set_xlabel("Hubbard param. (eV) iteration $N$")
            axis.set_ylabel("Hubbard param. (eV) iteration $N - 1$")
        

        # # Create a historgram of the relative errors
        # labels = {}
        # for species, frame in subset.groupby(keys.SPECIES):
        #     group_label = "-".join(species)
        #     mean = frame[keys.PARAM_OUT_RELATIVE_ERROR].mean()
        #     labels[group_label] = f"{group_label} {mean:.3f}"
        # 
        # plots.plot_param_histogram(
        #     subset, param_col=keys.PARAM_OUT_RELATIVE_ERROR, x_label="Relative error", labels=labels
        # )

        fig.savefig(plot_dir / f"uv_iter={uv_iter}_{label}.pdf", bbox_inches='tight')
