In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.offsetbox import OffsetImage, AnnotationBbox

from oneqmc.analysis import HARTREE_TO_KCAL
from oneqmc.analysis.plot import set_defaults

set_defaults()

In [None]:
SAVE_FIGURES = True

In [None]:
df = pd.read_csv("../../experiment_results/04_licn/energy.csv")

In [None]:
ref_energy = np.asarray(df[df["dataset"] == "Reference"]["rmean"])

In [None]:
plt.figure(figsize=(5, 5))
fts = 2 ** np.arange(0, 13)
subdfs = [
    (df[df["dataset"] == "Scratch"], "From scratch", "#4e938d"),
    (df[(df["pretraining"] == 512000) & (df["dataset"] == "TM")], "TinyMol", "#8661c5"),
    (
        df[(df["pretraining"] == 200000) & (df["dataset"] == "OC0")],
        "LAC Phase 1",
        "#0078d4",
    ),
    (
        df[(df["pretraining"] == 400000) & (df["dataset"] == "OC1")],
        "LAC full",
        "#27474a",
    ),
]
eps = 0  # To slightly offset the plots
for subdf, label, colour in subdfs:
    vals, yerrs = [], []
    for ft in fts:
        energy = np.asarray(subdf[subdf["finetuning"] == ft]["rmean"])
        diff = (energy - energy.mean()) - (ref_energy - ref_energy.mean())
        mae = HARTREE_TO_KCAL * np.abs(diff).mean()
        yerr = HARTREE_TO_KCAL * np.abs(diff).std()
        vals.append(mae)
        yerrs.append(yerr)
    plt.errorbar(fts * np.exp(eps), vals, yerr=yerrs, c=colour, label=label, marker="o")
    eps += 0.02

plt.yscale("log")
plt.xscale("log")
plt.xlabel("Scratch training or finetuning steps")
plt.ylabel("MARE (kcal/mol)")
plt.legend()

arrimg = mpimg.imread("../../experiment_results/04_licn/licn-molecule.png")
imagebox = OffsetImage(arrimg, zoom=0.25)
ab = AnnotationBbox(imagebox, (800, 23), frameon=False)
plt.gca().add_artist(ab)

plt.gca().axhspan(0, 5, color="grey", alpha=0.2, lw=0)
plt.gca().axhline(1, color="k", ls=":", zorder=1)
plt.gca().annotate("(e)", (0.9, 1100))

plt.tight_layout()
if SAVE_FIGURES:
    plt.savefig("licn.pdf")