# BBMEP create figure notebook

In [None]:
import matplotlib.pyplot as plt
from matplotlib import rcParams
import numpy as np
import pandas as pd
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
import matplotlib.image as mpimg
from scipy.ndimage import rotate

from oneqmc.analysis.plot import set_defaults
from oneqmc.analysis import colours

set_defaults()

colormaps_es = {
    "UM06-2X-D3/Dev2-QZVPP": colours.BRAND_BLUE,
    "UB3LYP-D3BJ/Dev2-QZVPP": colours.BRAND_MAGENTA,
    r"U$\omega$B97X-V-D3BJ/Dev2-QZVPP": colours.BRAND_TEAL,
    "UCCSD(T)/CBS": colours.BRAND_PURPLE,
    "NEVPT2(10,10)/CBS": colours.BRAND_DARK_ORANGE,
    "NEVPT2(10,10)/aug-cc-pVDZ": colours.BRAND_ORANGE,
    "DF-MRAQCC(10,10)/aug-cc-pVDZ": colours.BRAND_YELLOW,
    "DF-MRCISD+Q(10,10)/aug-cc-pVDZ": colours.BRAND_BROWN,
}
marker_es = {
    "UM06-2X-D3/Dev2-QZVPP": "<",
    "UB3LYP-D3BJ/Dev2-QZVPP": "<",
    r"U$\omega$B97X-V-D3BJ/Dev2-QZVPP": "<",
    "UCCSD(T)/CBS": "p",
    "NEVPT2(10,10)/CBS": "v",
    "NEVPT2(10,10)/aug-cc-pVDZ": "v",
    "DF-MRAQCC(10,10)/aug-cc-pVDZ": "^",
    "DF-MRCISD+Q(10,10)/aug-cc-pVDZ": "^",
}
np.set_printoptions(precision=12)

In [None]:
save_figures = False

In [None]:
def relative(energy, relative_to_0=False):
    if relative_to_0:
        return relative_chem(energy)
    else:
        return relative_mean(energy)


def relative_mean(energy):
    return energy - energy.mean(-1)[..., None]


def relative_chem(energy):
    try:
        return energy - energy[0][..., None]
    except:
        return energy - energy[:, 0][..., None]

In [None]:
data_dir = "../../experiment_results/02_bbmep"
system = [
    "Ethane",
    "Formamide",
    "1-Propanol",
    "2-Aminopropan-2-ol",
    "L-Alanine",
]
raw_energy = pd.read_csv(f"{data_dir}/BBMEP_orbformer_energy.csv")
ref = pd.read_csv(f"{data_dir}/BBMEP_reference_energy.csv")
es_energies = pd.read_csv(f"{data_dir}/BBMEP_es_energy.csv")
MAE = dict.fromkeys(system, {})
MAE_es = dict.fromkeys(system, {})
for i in system:
    reference_e = np.array(ref.loc[ref["MEP_system"] == i]["Reference_E(Hartree)"])
    es_e = es_energies.loc[es_energies["MEP_system"] == i]
    MAE_es[i] = dict.fromkeys(colormaps_es.keys(), {})
    for j in colormaps_es.keys():
        e = np.array(es_e.loc[es_e["Method"] == j]["E(Hartree)"])
        mae = np.mean(np.abs(relative(e) - relative(reference_e)))
        t = np.mean(es_e.loc[es_e["Method"] == j]["Time(s)"]) / 3600  ## hr
        MAE_es[i][j] = [t, mae]
    orb_energies = raw_energy.loc[raw_energy["MEP_system"] == i]
    if i in ["Ethane", "Formamide"]:
        method = [
            "Orbformer scratch, single structures",
            "Orbformer scratch, all structures",
            "Orbformer fine-tune LAC (400k), all structures",
            "Psiformer scratch (large batch), single structures",
        ]
    else:
        method = [
            "Orbformer scratch, single structures",
            "Orbformer scratch, all structures",
            "Orbformer fine-tune LAC (400k), all structures",
        ]
    MAE[i] = dict.fromkeys(method, {})
    for j in method:
        steps = sorted(
            set(orb_energies.loc[orb_energies["Ansatz"] == j]["Fine-tune/Train_steps"])
        )
        MAE[i][j] = []
        for k in steps:
            a = orb_energies.loc[orb_energies["Ansatz"] == j]
            e = np.array(
                (a.loc[orb_energies["Fine-tune/Train_steps"] == k]["E(Hartree)"])
            )
            mae = np.mean(np.abs(relative(e) - relative(reference_e)))
            MAE[i][j].append([k, mae])
        MAE[i][j] = np.vstack(MAE[i][j])

In [None]:
cost_option = "hour"  #'wallclock_hour', 'energy', 'hour'
# {"Orbformer scratch, single point": '#74baae', "Orbformer scratch, all points": '#4e938d',
# "Orbformer finetune LAC (200k), all points": '#366c6c', "Orbformer finetune LAC (400k), all points":'#27474a',
# "Orbformer finetune LAC (1000k), all points":'#1d2429', "Psiformer scratch, single point":'#8de971'}
qmc_colors = ["#74baae", "#4e938d", "#27474a", "#8de971"]


fig, ax = plt.subplots(3, 2, figsize=(18.5, 19), sharey=True)
compute_sec = {
    "Ethane": 16265.031 / 32000,
    "Formamide": 10323.08825 / 32000,
    "1-Propanol": 85435.938 / 80000,
    "2-Aminopropan-2-ol": 92369.6905 / 80000,
    "L-Alanine": 160420.138 / 100000,
}  # 4 GPU training costs
# Psiformer 2x timing 296013.404/200000 for single GPU, Psiformer 4x timing 178873.479/200000 for 4 GPU
psiformer_factor = {
    "Ethane": 296013.404 / 200000 / (4 * 16265.031 / 32000),
    "Formamide": 182497.587 / 200000 / (10323.08825 / 32000),
}
rcParams["axes.spines.top"] = True
ax_top = {}
print(MAE.keys())
for m, i in enumerate(MAE.keys()):
    for n, j in enumerate(MAE[i].keys()):
        x = MAE[i][j][:, 0]
        if "Psiformer" in j.split():
            x = x * psiformer_factor[i]
        if "all" in j.split():
            x = x / 20
        y = 627.5 * MAE[i][j][:, 1]
        h, v = np.divmod(m, 3)
        ax[v][h].scatter(x, y, marker="o", s=140, label=j, color=qmc_colors[n])
        a, b = np.polyfit(np.log(x), np.log(y), 1)
        ax[v][h].plot(x, np.exp(a * np.log(x) + b), color=qmc_colors[n], linewidth=4)
        ax[v][h].set_xscale("log", base=4)
        ax[v][h].set_yscale("log", base=4)
    ticks = [10, 50, 250, 1000, 4000, 16000, 64000, 300000]
    cost_factor = 300 / (280 / 64)
    for k in MAE_es[i].keys():
        ax[v][h].scatter(
            MAE_es[i][k][0] / (4 * compute_sec[i] / 3600 * cost_factor),
            627.5 * MAE_es[i][k][1],
            label=k,
            s=180,
            color=colormaps_es[k],
            marker=marker_es[k],
        )
    ax[v][h].set_xticks(ticks, ticks, fontsize=16)
    ax[v][h].axhspan(0.2, 5, color="grey", alpha=0.2, lw=0)
    ax[v][h].set_xlim([6, 580000])
    ax_top[m] = ax[v][h].twiny()
    ax_top[m].set_xscale("log", base=4)
    ax_top[m].set_xlim([6, 580000])
    if cost_option == "hour":
        ax[v][h].set_xticklabels(
            np.round(np.array(ticks) * 4 * compute_sec[i] / 3600 * cost_factor, 1)
        )
    else:
        ax[v][h].set_xticklabels(cost)
    ax[v][h].axhline(1, color="k", ls=":", linewidth=4)
    ax[v][h].set_yticks(
        [0.5, 1, 2, 5, 10, 20, 50], [0.5, 1, 2, 5, 10, 20, 50], fontsize=16
    )
    ax[v][h].set_ylim([0.2, 99])
    if v == 0:
        if cost_option == "hour":
            ax_top[m].set_xlabel("Orbformer A100 GPU hr/structure", fontsize=22)
        else:
            ax_top[m].set_xlabel(
                "Scratch training or finetuning steps per structure", fontsize=22
            )
    if h == 0:
        ax[v][h].set_ylabel("MARE over entire MEP (kcal/mol)", fontsize=22)
    if cost_option == "hour":
        ax_top[m].set_xticks(ticks)
        ax_top[m].set_xticklabels(
            np.round(np.array(ticks) * 4 * compute_sec[i] / 3600, 2), fontsize=16
        )
    else:
        ax_top[m].set_xticks(ticks)
        ax_top[m].set_xticklabels(ticks)
ax[2][1].set_axis_off()

ax[0][0].legend(
    bbox_to_anchor=(2.03, -1.28),
    fontsize=22,
    handletextpad=0.02,
    borderpad=0.2,
    labelspacing=0.4,
)
ax[0][0].text(0.03, 0.9, "(g) Ethane", transform=ax[0][0].transAxes, fontsize=24)
arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/Ethane/Ethane_0.png")
imagebox = OffsetImage(arrimg, zoom=0.12)
ab = AnnotationBbox(imagebox, (2500, 20), frameon=False)
ax[0][0].add_artist(ab)
arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/Ethane/Ethane_19.png")
imagebox = OffsetImage(arrimg, zoom=0.2)
ab = AnnotationBbox(imagebox, (35000, 15), frameon=False)
ax[0][0].add_artist(ab)
ax[0][0].annotate(
    "", xytext=(5500, 20), xy=(12000, 20), arrowprops=dict(arrowstyle="->")
)

ax[1][0].text(0.03, 0.9, "(h) Formamide", transform=ax[1][0].transAxes, fontsize=24)
arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/Formamide/Formamide_0.png")
imagebox = OffsetImage(arrimg, zoom=0.12)
ab = AnnotationBbox(imagebox, (10000, 30), frameon=False)
ax[1][0].add_artist(ab)
arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/Formamide/Formamide_19.png")
imagebox = OffsetImage(arrimg, zoom=0.2)
ab = AnnotationBbox(imagebox, (100000, 25), frameon=False)
ax[1][0].add_artist(ab)
ax[1][0].annotate(
    "", xytext=(22000, 30), xy=(45000, 30), arrowprops=dict(arrowstyle="->")
)

ax[2][0].text(0.03, 0.9, "(i) 1-Propanol", transform=ax[2][0].transAxes, fontsize=24)
arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/1-Propanol/1-Propanol_0.png")
imagebox = OffsetImage(arrimg, zoom=0.17)
ab = AnnotationBbox(imagebox, (12000, 38), frameon=False)
ax[2][0].add_artist(ab)
arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/1-Propanol/1-Propanol_19.png")
arrimg = np.rot90(arrimg)
imagebox = OffsetImage(arrimg, zoom=0.25)
ab = AnnotationBbox(imagebox, (120000, 28), frameon=False)
ax[2][0].add_artist(ab)
ax[2][0].annotate(
    "", xytext=(30000, 38), xy=(55000, 38), arrowprops=dict(arrowstyle="->")
)

ax[0][1].text(
    0.03, 0.9, "(j) 2-Aminopropan-2-ol", transform=ax[0][1].transAxes, fontsize=24
)
arrimg = mpimg.imread(
    f"{data_dir}/molecule_images/bbmep/2-Aminopropan-2-ol/2-Aminopropan-2-ol_0.png"
)
imagebox = OffsetImage(arrimg, zoom=0.18)
ab = AnnotationBbox(imagebox, (14000, 35), frameon=False)
ax[0][1].add_artist(ab)
arrimg = mpimg.imread(
    f"{data_dir}/molecule_images/bbmep/2-Aminopropan-2-ol/2-Aminopropan-2-ol_19.png"
)
arrimg = rotate(arrimg, angle=60)
imagebox = OffsetImage(arrimg, zoom=0.23)
ab = AnnotationBbox(imagebox, (120000, 28), frameon=False)
ax[0][1].add_artist(ab)
ax[0][1].annotate(
    "", xytext=(28000, 30), xy=(60000, 30), arrowprops=dict(arrowstyle="->")
)

ax[1][1].text(0.03, 0.9, "(k) L-Alanine", transform=ax[1][1].transAxes, fontsize=24)
arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/L-Alanine/L-Alanine_0.png")
imagebox = OffsetImage(arrimg, zoom=0.18)
ab = AnnotationBbox(imagebox, (28000, 40), frameon=False)
ax[1][1].add_artist(ab)
arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/L-Alanine/L-Alanine_19.png")
arrimg = rotate(arrimg, angle=60)
imagebox = OffsetImage(arrimg, zoom=0.16)
ab = AnnotationBbox(imagebox, (140000, 25), frameon=False)
ax[1][1].add_artist(ab)
ax[1][1].annotate(
    "", xytext=(44000, 30), xy=(85000, 30), arrowprops=dict(arrowstyle="->")
)

if cost_option == "wallclock_hour":
    ax[2][0].set_xlabel("GPU or CPU hr/structure", fontsize=22)
    ax[1][1].set_xlabel("GPU or CPU hr/structure", fontsize=22)
elif cost_option == "energy":
    ax[2][0].set_xlabel("Energy cost/structure (KWh)", fontsize=22)
    ax[1][1].set_xlabel("Energy cost/structure (KWh)", fontsize=22)
else:
    ax[2][0].set_xlabel("Other methods AMD EPYC 7763 CPU hr/structure", fontsize=22)
    ax[1][1].set_xlabel("Other methods AMD EPYC 7763 CPU hr/structure", fontsize=22)

ax[0][0].text(
    0.55,
    1.16,
    "Mean Absolute Relative Energy Error (MARE)",
    transform=ax[0][0].transAxes,
    fontsize=26,
)

plt.subplots_adjust(wspace=0.01, hspace=0.15)
plt.draw()
if save_figures:
    plt.savefig(f"{data_dir}/BBMEP_result_fig_hours.pdf", dpi=600)
plt.show()

In [None]:
## Get reaction profile figure

fig, ax = plt.subplots(3, 2, figsize=(13, 17.5), sharex=True)

for m, j in enumerate(["Ethane", "Formamide", "2-Aminopropan-2-ol"]):
    step = int(MAE[j]["Orbformer fine-tune LAC (400k), all structures"][-1, 0])
    qmc_e = raw_energy.loc[
        (raw_energy["MEP_system"] == j)
        & (raw_energy["Ansatz"] == "Orbformer fine-tune LAC (400k), all structures")
        & (raw_energy["Fine-tune/Train_steps"] == step)
    ]["E(Hartree)"]
    qmc_e = relative(np.array(qmc_e))
    es_e = es_energies.loc[
        (es_energies["MEP_system"] == j)
        & (es_energies["Method"] == "DF-MRCISD+Q(10,10)/aug-cc-pVDZ")
    ]["E(Hartree)"]
    es_e = relative(np.array(es_e))
    ref_e = relative(np.array(ref.loc[ref["MEP_system"] == j]["Reference_E(Hartree)"]))
    std = np.array(ref.loc[ref["MEP_system"] == j]["Reference_E_std(Hartree)"])
    ax[m][0].plot(
        range(20),
        627.5 * qmc_e,
        color="#27474a",
        marker="o",
        label="Best Orbformer fine-tune LAC (400k), all structures",
        linewidth=3.5,
        markersize=14,
        linestyle="-.",
    )
    ax[m][1].plot(
        range(20),
        627.5 * (qmc_e - ref_e),
        color="#27474a",
        marker="o",
        label="Best Orbformer fine-tune LAC (400k), all structures",
        linewidth=3.5,
        markersize=14,
        linestyle="-.",
    )
    ax[m][0].plot(
        range(20),
        627.5 * es_e,
        marker="^",
        label="DF-MRCISD+Q(10,10)/aug-cc-pVDZ",
        color=colormaps_es["DF-MRCISD+Q(10,10)/aug-cc-pVDZ"],
        linewidth=3.5,
        markersize=14,
        linestyle="--",
    )
    ax[m][1].plot(
        range(20),
        627.5 * (es_e - ref_e),
        marker="^",
        label="DF-MRCISD+Q(10,10)/aug-cc-pVDZ",
        color=colormaps_es["DF-MRCISD+Q(10,10)/aug-cc-pVDZ"],
        linewidth=3.5,
        markersize=14,
        linestyle="--",
    )
    ax[m][0].errorbar(
        x=range(20),
        y=627.5 * ref_e,
        yerr=627.5 * std,
        label="Reference deep QMC",
        color="k",
        linestyle="-",
        linewidth=3,
        alpha=0.8,
    )
    ax[m][1].axhline(-1, color="k", ls=":", linewidth=3)
    ax[m][1].axhline(1, color="k", ls=":", linewidth=3)
    if m != 2:
        ax[m][1].text(
            0.56,
            0.9,
            f"MARE={627.5*np.mean(np.abs((qmc_e-ref_e))):.2f}",
            color="#27474a",
            transform=ax[m][1].transAxes,
            fontsize=22,
        )
        ax[m][1].text(
            0.56,
            0.8,
            f"MARE={627.5*np.mean(np.abs((es_e-ref_e))):.2f}",
            color=colormaps_es["DF-MRCISD+Q(10,10)/aug-cc-pVDZ"],
            transform=ax[m][1].transAxes,
            fontsize=22,
        )
    else:
        ax[m][1].text(
            0.56,
            0.8,
            f"MARE={627.5*np.mean(np.abs((qmc_e-ref_e))):.2f}",
            color="#27474a",
            transform=ax[m][1].transAxes,
            fontsize=22,
        )
        ax[m][1].text(
            0.56,
            0.7,
            f"MARE={627.5*np.mean(np.abs((es_e-ref_e))):.2f}",
            color=colormaps_es["DF-MRCISD+Q(10,10)/aug-cc-pVDZ"],
            transform=ax[m][1].transAxes,
            fontsize=22,
        )
    ax[m][0].set_ylabel("Relative E (kcal/mol)", fontsize=22)
    ax[m][1].set_ylabel("Relative E Error (kcal/mol)", fontsize=22)
ax[0][0].text(0.03, 0.9, "(a) Ethane", transform=ax[0][0].transAxes, fontsize=24)
ax[0][1].text(0.03, 0.9, "(d) Ethane", transform=ax[0][1].transAxes, fontsize=24)
ax[1][0].text(0.03, 0.9, "(b) Formamide", transform=ax[1][0].transAxes, fontsize=24)
ax[1][1].text(0.03, 0.9, "(e) Formamide", transform=ax[1][1].transAxes, fontsize=24)
ax[2][0].text(
    0.03, 0.9, "(c) 2-Aminopropan-2-ol", transform=ax[2][0].transAxes, fontsize=24
)
ax[2][1].text(
    0.03, 0.9, "(f) 2-Aminopropan-2-ol", transform=ax[2][1].transAxes, fontsize=24
)

arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/Ethane/Ethane_0.png")
imagebox = OffsetImage(arrimg, zoom=0.1)
ab = AnnotationBbox(imagebox, (1.1, -55), frameon=False)
ax[0][0].add_artist(ab)
arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/Ethane/Ethane_8.png")
imagebox = OffsetImage(arrimg, zoom=0.2)
ab = AnnotationBbox(imagebox, (7.6, 2), frameon=False)
ax[0][0].add_artist(ab)
arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/Ethane/Ethane_19.png")
imagebox = OffsetImage(arrimg, zoom=0.18)
ab = AnnotationBbox(imagebox, (17, -20), frameon=False)
ax[0][0].add_artist(ab)
ax[0][0].annotate("", xytext=(0, -85), xy=(0.7, -70), arrowprops=dict(arrowstyle="->"))
ax[0][0].annotate("", xytext=(8, 45), xy=(8, 10), arrowprops=dict(arrowstyle="->"))
ax[0][0].annotate("", xytext=(19, 10), xy=(18, -10), arrowprops=dict(arrowstyle="->"))


arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/Formamide/Formamide_0.png")
imagebox = OffsetImage(arrimg, zoom=0.1)
ab = AnnotationBbox(imagebox, (1.3, -40), frameon=False)
ax[1][0].add_artist(ab)
arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/Formamide/Formamide_6.png")
imagebox = OffsetImage(arrimg, zoom=0.18)
ab = AnnotationBbox(imagebox, (8.8, 40), frameon=False)
ax[1][0].add_artist(ab)
arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/Formamide/Formamide_10.png")
imagebox = OffsetImage(arrimg, zoom=0.18)
ab = AnnotationBbox(imagebox, (9.2, -20), frameon=False)
ax[1][0].add_artist(ab)
arrimg = mpimg.imread(f"{data_dir}/molecule_images/bbmep/Formamide/Formamide_19.png")
imagebox = OffsetImage(arrimg, zoom=0.2)
ab = AnnotationBbox(imagebox, (17, 30), frameon=False)
ax[1][0].add_artist(ab)
ax[1][0].annotate("", xytext=(0, -65), xy=(0.7, -50), arrowprops=dict(arrowstyle="->"))
ax[1][0].annotate("", xytext=(6.4, 45), xy=(7.5, 45), arrowprops=dict(arrowstyle="->"))
ax[1][0].annotate("", xytext=(10, -42), xy=(10, -30), arrowprops=dict(arrowstyle="->"))
ax[1][0].annotate("", xytext=(19, 55), xy=(18, 40), arrowprops=dict(arrowstyle="->"))

arrimg = mpimg.imread(
    f"{data_dir}/molecule_images/bbmep/2-Aminopropan-2-ol/2-Aminopropan-2-ol_0.png"
)
imagebox = OffsetImage(arrimg, zoom=0.18)
ab = AnnotationBbox(imagebox, (1.4, -25), frameon=False)
ax[2][0].add_artist(ab)
arrimg = mpimg.imread(
    f"{data_dir}/molecule_images/bbmep/2-Aminopropan-2-ol/2-Aminopropan-2-ol_19.png"
)
arrimg = rotate(arrimg, angle=60)
imagebox = OffsetImage(arrimg, zoom=0.22)
ab = AnnotationBbox(imagebox, (17, 0), frameon=False)
ax[2][0].add_artist(ab)
ax[2][0].annotate("", xytext=(0, -50), xy=(0.7, -40), arrowprops=dict(arrowstyle="->"))
ax[2][0].annotate("", xytext=(19, 35), xy=(17, 20), arrowprops=dict(arrowstyle="->"))
ax[0][0].text(0.28, 1.1, "Relative Energy", transform=ax[0][0].transAxes, fontsize=26)
ax[0][1].text(
    0.18, 1.1, "Relative Energy Error", transform=ax[0][1].transAxes, fontsize=26
)
ax[2][0].set_ylim([-65, 58])
plt.subplots_adjust(wspace=0.2, hspace=0.02)
ax[2][0].set_xticks(2 * np.arange(11))
ax[2][0].set_xlim([-0.5, 19.5])
ax[2][0].set_xticklabels(2 * np.arange(11))
ax[2][0].set_xlabel("Image ID on MEP", fontsize=22)
ax[2][1].set_xlabel("Image ID on MEP", fontsize=22)
plt.draw()
if save_figures:
    plt.savefig(f"{data_dir}/BBMEP_energy_profile.pdf", dpi=600)
plt.show()