In [1]:
import sys
from pathlib import Path
ROOT = Path().resolve().parents[0]
sys.path.append(str(ROOT / "src"))

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from scipy.stats import gaussian_kde
from scipy.stats import norm

plt.style.use("rotskoff_notebook.mplstyle")

In [2]:
############## LOAD EXPLICIT SOLVENT DATA - RAW, TWO-STEP and MULTICONFIGURATIONAL RESP ###################

explicit_MICROSTATES = ["APP_explicit", "BPP_explicit", "CPP_explicit", "DPP_explicit", "PPP_explicit"]

INPUT_ROOT = ROOT / "data" / "microstates"
RESULTS_ROOT = ROOT / "results"

ex_nitro_dataset = {}; ex_oxygen_ring_dataset = {}; ex_carbon_ring_dataset = {}

for state in explicit_MICROSTATES:
    base = RESULTS_ROOT / state
    labels = np.load(base / "rawESP" / "charges.npz", allow_pickle=True)["labels"].astype(str)
    esp_charges = np.load(base / "rawESP" / "charges.npz", allow_pickle=True)["charges"]
    tsresp_charges = np.load(base / "twostepRESP_basic" / "charges.npz", allow_pickle=True)["step2"]
    mcresp_charges = np.load(base / "multiconfRESP_reduced_basic" / "charges_final.npy", allow_pickle=True)

    mask_n = np.char.startswith(labels, "N")
    ex_nitro_dataset[state] = {
        "labels": labels[mask_n],
        "raw_charges": esp_charges[mask_n],
        "tsresp_charges": tsresp_charges[mask_n],
        "mcresp_charges": mcresp_charges[mask_n],
    }

    mask_o_ring = np.array([lbl.startswith("O") and len(lbl) > 1 and not lbl[1].isdigit() for lbl in labels])
    ex_oxygen_ring_dataset[state] = {
        "labels": labels[mask_o_ring],
        "raw_charges": esp_charges[mask_o_ring],
        "tsresp_charges": tsresp_charges[mask_o_ring],
        "mcresp_charges": mcresp_charges[mask_o_ring],
    }

    mask_c_ring = np.array([lbl.startswith("C") and len(lbl) > 1 and not lbl[1].isdigit() for lbl in labels])
    ex_carbon_ring_dataset[state] = {
        "labels": labels[mask_c_ring],
        "raw_charges": esp_charges[mask_c_ring],
        "tsresp_charges": tsresp_charges[mask_c_ring],
        "mcresp_charges": mcresp_charges[mask_c_ring],
    }

################# LOAD IMPLICIT SOLVENT WEIGHTED RAW DATA ONLY FOR A COMPARISON #################

implicit_MICROSTATES = ["APP", "BPP", "CPP", "DPP", "PPP"]

im_nitro_dataset = {}; im_oxygen_ring_dataset = {}; im_carbon_ring_dataset = {}

for state in implicit_MICROSTATES:
    base_results = RESULTS_ROOT / state
    base_input = INPUT_ROOT / state
    npz = np.load(base_results / "rawESP" / "charges.npz", allow_pickle=True)
    labels = npz["labels"].astype(str)
    charges = npz["charges"]

    cluster_weights = np.loadtxt(base_input / "weights.dat", usecols=1)
    weights = np.concatenate([np.full(500, w) for w in cluster_weights])

    assert weights.shape[0] == charges.shape[1], f"weight mismatch for {state}"

    mask_n = np.char.startswith(labels, "N")
    im_nitro_dataset[state] = {
        "labels": labels[mask_n],
        "charges": charges[mask_n],
        "weights": weights,
    }

    mask_o_ring = np.array([lbl.startswith("O") and len(lbl) > 1 and not lbl[1].isdigit() for lbl in labels])
    im_oxygen_ring_dataset[state] = {
        "labels": labels[mask_o_ring],
        "charges": charges[mask_o_ring],
        "weights": weights,
    }

    mask_c_ring = np.array([lbl.startswith("C") and len(lbl) > 1 and not lbl[1].isdigit() for lbl in labels])
    im_carbon_ring_dataset[state] = {
        "labels": labels[mask_c_ring],
        "charges": charges[mask_c_ring],
        "weights": weights,
    }


In [3]:
############## LOAD EXPLICIT SOLVENT DATA - RAW, TWO-STEP and MULTICONFIGURATIONAL RESP ###################

explicit_MICROSTATES = ["APP_explicit", "BPP_explicit", "CPP_explicit", "DPP_explicit", "PPP_explicit"]

INPUT_ROOT = ROOT / "data" / "microstates"
RESULTS_ROOT = ROOT / "results"

ex_nitro_dataset = {}; ex_oxygen_ring_dataset = {}; ex_carbon_ring_dataset = {}

for state in explicit_MICROSTATES:
    base = RESULTS_ROOT / state
    labels = np.load(base / "rawESP" / "charges.npz", allow_pickle=True)["labels"].astype(str)
    esp_charges = np.load(base / "rawESP" / "charges.npz", allow_pickle=True)["charges"]
    tsresp_charges = np.load(base / "twostepRESP_basic" / "charges.npz", allow_pickle=True)["step2"]
    mcresp_charges = np.load(base / "multiconfRESP_reduced_basic" / "charges_final.npy", allow_pickle=True)

    mask_n = np.char.startswith(labels, "N")
    ex_nitro_dataset[state] = {
        "labels": labels[mask_n],
        "raw_charges": esp_charges[mask_n],
        "tsresp_charges": tsresp_charges[mask_n],
        "mcresp_charges": mcresp_charges[mask_n],
    }

    mask_o_ring = np.array([lbl.startswith("O") and len(lbl) > 1 and not lbl[1].isdigit() for lbl in labels])
    ex_oxygen_ring_dataset[state] = {
        "labels": labels[mask_o_ring],
        "raw_charges": esp_charges[mask_o_ring],
        "tsresp_charges": tsresp_charges[mask_o_ring],
        "mcresp_charges": mcresp_charges[mask_o_ring],
    }

    mask_c_ring = np.array([lbl.startswith("C") and len(lbl) > 1 and not lbl[1].isdigit() for lbl in labels])
    ex_carbon_ring_dataset[state] = {
        "labels": labels[mask_c_ring],
        "raw_charges": esp_charges[mask_c_ring],
        "tsresp_charges": tsresp_charges[mask_c_ring],
        "mcresp_charges": mcresp_charges[mask_c_ring],
    }

################# LOAD IMPLICIT SOLVENT WEIGHTED RAW DATA ONLY FOR A COMPARISON #################

implicit_MICROSTATES = ["APP", "BPP", "CPP", "DPP", "PPP"]

im_nitro_dataset = {}; im_oxygen_ring_dataset = {}; im_carbon_ring_dataset = {}

for state in implicit_MICROSTATES:
    base_results = RESULTS_ROOT / state
    base_input = INPUT_ROOT / state
    npz = np.load(base_results / "rawESP" / "charges.npz", allow_pickle=True)
    labels = npz["labels"].astype(str)
    charges = npz["charges"]

    cluster_weights = np.loadtxt(base_input / "weights.dat", usecols=1)
    weights = np.concatenate([np.full(500, w) for w in cluster_weights])

    assert weights.shape[0] == charges.shape[1], f"weight mismatch for {state}"

    mask_n = np.char.startswith(labels, "N")
    im_nitro_dataset[state] = {
        "labels": labels[mask_n],
        "charges": charges[mask_n],
        "weights": weights,
    }

    mask_o_ring = np.array([lbl.startswith("O") and len(lbl) > 1 and not lbl[1].isdigit() for lbl in labels])
    im_oxygen_ring_dataset[state] = {
        "labels": labels[mask_o_ring],
        "charges": charges[mask_o_ring],
        "weights": weights,
    }

    mask_c_ring = np.array([lbl.startswith("C") and len(lbl) > 1 and not lbl[1].isdigit() for lbl in labels])
    im_carbon_ring_dataset[state] = {
        "labels": labels[mask_c_ring],
        "charges": charges[mask_c_ring],
        "weights": weights,
    }
