# Sample Analysis

In [1]:
%load_ext autoreload
%autoreload 2
%load_ext autotime
%load_ext line_profiler

from joblib import Parallel, delayed

import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
from dwave.system import DWaveSampler, FixedEmbeddingComposite
from matplotlib.patches import Rectangle
from numba import njit

matplotlib.rcParams.update({'font.size': 14})

from qbm.utils import (
    compute_stats_over_dfs,
    convert_bin_list_to_str,
    get_project_dir,
    get_rng,
    load_artifact,
    save_artifact,
)

project_dir = get_project_dir()

time: 1.61 s (started: 2022-03-09 10:08:05 +01:00)


## Analysis functions

In [2]:
@njit(boundscheck=True)
def kl_divergence(
    p_exact,
    E_exact,
    E_samples,
    counts_samples,
    n_bins=32,
    prob_sum_tol=1e-6,
    ϵ_smooth=1e-6,
):
    """
    Computes the KL divergence of the theory w.r.t. the samples, i.e., 
    D_KL(p_exact || p_samples).
    
    :param p_exact: Exact computed probability vector, i.e., the diagonal of ρ.
    :param E_exact: Exact computed energy vector, i.e., the diagonal of H.
    :param E_samples: Energies of the samples.
    :param n_bins: Number of bins to compute over.
    :param prob_sum_tol: The tolerance for the probabilities to sum up to approx 1.
    :param ϵ_smooth: Smoothing parameter for the samples distribution.
    
    :returns: D_KL(p_exact || p_samples).
    """
    p = np.zeros(n_bins)
    q = np.zeros(n_bins)

    # compute the bin edges
    buffer = np.abs(E_exact).max() * 1e-15
    bin_edges = np.linspace(E_exact.min() - buffer, E_exact.max() + buffer, n_bins + 1)

    # check that bin edges include all possible E values
    assert bin_edges.min() <= E_exact.min()
    assert bin_edges.max() >= E_exact.max()

    # bin the probabilities
    sum_counts = counts_samples.sum()
    for i, (a, b) in enumerate(zip(bin_edges[:-1], bin_edges[1:])):
        if i < n_bins - 1:
            p[i] = p_exact[np.logical_and(E_exact >= a, E_exact < b)].sum()
            q[i] = (
                counts_samples[np.logical_and(E_samples >= a, E_samples < b)].sum()
                / sum_counts
            )
        else:
            p[i] = p_exact[E_exact >= a].sum()
            q[i] = counts_samples[E_samples >= a].sum() / sum_counts

    # smoothing of sample data
    smooth_mask = np.logical_and(p > 0, q == 0)
    not_smooth_mask = np.logical_not(smooth_mask)
    q[smooth_mask] = p[smooth_mask] * ϵ_smooth
    q[not_smooth_mask] -= q[smooth_mask].sum() / not_smooth_mask.sum()

    # check that p and q sum up to approx 1
    assert np.abs(p.sum() - 1) < prob_sum_tol
    assert np.abs(q.sum() - 1) < prob_sum_tol

    # take intersection of supports to avoid div zero errors
    support_intersection = np.logical_and(p > 0, q > 0)
    p = p[support_intersection]
    q = q[support_intersection]

    return (p * np.log(p / q)).sum()


@njit(boundscheck=True)
def get_state_energies(states, E_exact):
    """
    Returns the (quantum + classical) energies of the provided states corresponding
    to the provided exact calculated energies.
    
    :param states: Array of states. Must be a value in 0, 1, ..., 2 ** n_qubits - 1.
    :param E_exact: Array of exact computed energies, corresponds to the diagonal of H.
    
    :returns: Array where entry i is the energy of states[i].
    """
    E_samples = np.zeros(len(states))
    for i, state in enumerate(states):
        E_samples[i] = E_exact[state]

    return E_samples


def convert_spin_vector_to_state_number(spins):
    """
    Converts the spins vector (e.g. all values ±1) to an integer corresponding to the state.
    For example, the spin vector [1, 1, 1, 1] corresponds to the state |0000⟩ which is the
    0th state. The spin vector [-1, -1, -1, -1] corresponds to the state |1111⟩ which is the
    15th state.
    
    :param spins: Vector of spin values (±1).
    
    :returns: Integer corresponding to the state. 
    """
    bit_vector = ((1 - spins) / 2).astype(np.int64)

    return (bit_vector * 2 ** np.arange(len(spins) - 1, -1, -1)).sum()


def kl_divergence_df(exact_data, samples):
    """
    Compares each exact computed data distribution against the provided samples instance.
    
    :param exact_data: Dictionary with keys of the form (s, T) with s being the relative
        anneal time at which H and ρ were computed, and T being the effective temperature.
        Values are of the form {"E": [...], "p": [...]}
    :param samples: Instance of Ocean SDK SampleSet.
    
    :returns: Dataframe of KL divergences, with T values as index and s values as columns.
    """
    # convert spin vectors to state numbers
    states = np.array(
        [convert_spin_vector_to_state_number(x) for x in samples.record.sample]
    )

    dkl = {}
    for s, T in exact_data.keys():
        p_exact = exact_data[(s, T)]["p"]
        E_exact = exact_data[(s, T)]["E"]
        E_samples = get_state_energies(states, E_exact)

        dkl[int(T * 1000), s] = kl_divergence(
            p_exact, E_exact, E_samples, samples.record.num_occurrences
        )

    return pd.Series(dkl)


def process_run_gauge_dir(run, gauge_dir, exact_data):
    """
    Helper function for processing the runs and computing the KL divergences
    in parallel.
    
    :param run: Name of the run.
    :param gauge_dir: Directory of the gauge data.
    :param exact_data: Exact computed data to compare against.
    
    :returns: KL divergence dataframe.
    """
    samples = load_artifact(gauge_dir / f"{run}.pkl")
    dkl_df = kl_divergence_df(exact_data, samples)

    return dkl_df

time: 19.7 ms (started: 2022-03-09 10:08:07 +01:00)


## Data Loading

In [3]:
config_id = 5
embedding_id = 10
n_jobs = 6

config_dir = project_dir / f"artifacts/exact_analysis/{config_id:02}/"
embedding_dir = config_dir / f"samples/embedding_{embedding_id:02}"

config = load_artifact(config_dir / "config.json")
exact_data = load_artifact(config_dir / "exact_data.pkl")

gauge_dirs = sorted([x for x in embedding_dir.iterdir() if x.name.startswith("gauge_")])
run_names = sorted([x.stem for x in gauge_dirs[0].iterdir() if x.name != "gauge.pkl"])

run_infos = {}
t_as = []
s_pauses = []
anneal_durations = []
pause_durations = []
for run_name in run_names:
    run_info = {x.split("=")[0]: x.split("=")[1] for x in run_name.split("-")}
    for k, v in run_info.items():
        if k in ("t_pause", "s_pause", "pause_duration", "quench_slope"):
            run_info[k] = float(v)
    for k, v in run_info.items():
        if k in ("reverse", "reinit") and v == "True":
            run_info[k] = True
        elif k in ("reverse", "reinit") and v == "False":
            run_info[k] = False

    if "reverse" in run_info:
        run_info["t_a"] = round(run_info["t_pause"] / (1 - run_info["s_pause"]), 1)
    else:
        run_info["reverse"] = False
        run_info["reinit"] = True
        run_info["t_a"] = round(run_info["t_pause"] / run_info["s_pause"], 1)
    run_infos[run_name] = run_info

    if run_info["t_a"] not in t_as:
        t_as.append(run_info["t_a"])

    if run_info["s_pause"] not in s_pauses:
        s_pauses.append(run_info["s_pause"])

    if run_info["pause_duration"] not in pause_durations:
        pause_durations.append(run_info["pause_duration"])

t_as = sorted(t_as)
pause_durations = sorted(pause_durations)
anneal_durations = sorted(anneal_durations)

t_as = [x for x in t_as if x != 10]
run_names = sorted(run_names, key=lambda run_name: run_infos[run_name]["s_pause"])

run_names_ = []
for run_name in run_names:
    run_info = run_infos[run_name]
    if (
        round(run_info["s_pause"] * 100) % 5 == 0
        and run_info["pause_duration"] != 1000
        and not run_info["reverse"]
    ):
        run_names_.append(run_name)
run_names = sorted(run_names_, key=lambda x: run_infos[x]["s_pause"])
run_infos = {k: v for k, v in run_infos.items() if k in run_names}

time: 383 ms (started: 2022-03-09 10:08:07 +01:00)


## KL Divergence Computations

In [4]:
compute_kl_divergences = False
if not (embedding_dir / "kl_divergences.pkl").exists() or compute_kl_divergences:
    dkls = {}
    for run_name in run_names:
        dkl_dfs = Parallel(n_jobs=n_jobs)(
            delayed(process_run_gauge_dir)(run_name, gauge_dir, exact_data)
            for gauge_dir in gauge_dirs
        )
        dkls[run_name] = compute_stats_over_dfs(dkl_dfs)
    save_artifact(dkls, embedding_dir / "kl_divergences.pkl")
else:
    dkls = load_artifact(embedding_dir / "kl_divergences.pkl")

time: 292 ms (started: 2022-03-09 10:08:07 +01:00)


## KL Divergence Min Value Plots

In [7]:
plot_dir = project_dir / f"results/plots/qbm/8x4/exact_analysis/config_{config_id:02}/embedding_{embedding_id:02}"
if not plot_dir.exists():
    plot_dir.mkdir(parents=True)

markers = ["o", "^", "v", "<", ">", "s", "p", "*", "P", "X"]
colors = [
    "tab:blue",
    "tab:orange",
    "tab:green",
    "tab:red",
    "tab:purple",
    "tab:brown",
    "tab:pink",
    "tab:gray",
    "tab:olive",
    "tab:cyan",
]
α_quench = 2.0
fig, ax = plt.subplots(figsize=(10, 6), dpi=300)
# if config_id == 2:
#     ax.set_title(fr"Embedding {embedding_id}, $h_i, J_{{ij}} \sim \mathcal{{N}}(0, 0.1)$")
# elif config_id == 3:
#     ax.set_title(fr"Embedding {embedding_id}, $h_i, J_{{ij}} \sim \mathcal{{N}}(0, 1)$")
    
ax.set_xlabel(r"$s_{{quench}}$")
ax.set_ylabel(r"$\min_{s,T}\{D_{KL}(p_{exact} \ || \ p_{samples})\}$")
i = 0
for t_a in sorted(t_as):
    if config_id == 1:
        ax.set_xticks(np.arange(0.25, 0.8, 0.05))
        ax.set_yticks(np.arange(0, 0.07, 0.01))
        ax.set_ylim(0, 0.06)
    elif config_id in (2, 3, 5):
        ax.set_xticks(np.arange(0.2, 1.1, 0.1))
        ax.set_yticks(np.arange(0, 0.045, 0.005))
        ax.set_ylim(0, 0.025)

    run_names_plot = [
        run_name for run_name, run_info in run_infos.items() if run_info["t_a"] == t_a
    ]
    for pause_duration in pause_durations:
        x = []
        y = []
        y_err = []
        for run_name in run_names_plot:
            run_info = run_infos[run_name]
            if (
                run_info["pause_duration"] == pause_duration
                and run_info["reverse"] == False
                and round(run_info["s_pause"] * 100) % 5 == 0
            ):
                dkls_run = dkls[run_name]

                means = dkls_run["means"]
                argmin = np.argmin(means)
                stds = dkls_run["stds"]
                
                x.append(run_infos[run_name]["s_pause"])
                y.append(means.iloc[argmin])
                y_err.append(stds.iloc[argmin])

        if x and y:
            sort_indices = np.argsort(x)
            x = np.array(x)[sort_indices]
            y = np.array(y)[sort_indices]
            y_err = np.array(y_err)[sort_indices]
            label = fr"$t_a = {int(t_a)}$ μs, $\Delta_{{pause}} = {int(pause_duration)}$ μs"
            ax.fill_between(x, y - y_err, y + y_err, interpolate=True, color=colors[i], alpha=0.10)
            ax.plot(
                x,
                y,
                marker=markers[i],
                markersize=10,
                linewidth=1.2,
                label=label,
                color=colors[i],
            )
            i += 1

ax.grid(True, alpha=0.7)
ax.legend(ncol=2)
plt.tight_layout()
plt.savefig(plot_dir / f"kl_divergence_mins.png")

<Figure size 3000x1800 with 1 Axes>

time: 1.11 s (started: 2022-03-09 10:17:12 +01:00)


## Anneal Schedule Plots

In [6]:
# load anneal schedule
qpu_params = config["qpu_params"]
if qpu_params["solver"] == "Advantage_system4.1":
    artifacts_dir = project_dir / "artifacts/qbm/log_returns/Advantage_4.1"
    csv_name = "09-1263A-A_Advantage_system4_1_annealing_schedule.csv"
elif qpu_params["solver"] == "Advantage_system5.1":
    artifacts_dir = project_dir / "artifacts/qbm/log_returns/Advantage_5.1"
    csv_name = "09-1265A-A_Advantage_system5_1_annealing_schedule.csv"
df_anneal = pd.read_csv(
    project_dir
    / f"data/anneal_schedules/csv/{csv_name}",
    index_col="s",
)
if 0.5 not in df_anneal.index:
    df_anneal.loc[0.5] = (df_anneal.loc[0.499] + df_anneal.loc[0.501]) / 2
df_anneal.sort_index(inplace=True)
df_anneal["Q(s)"] = df_anneal["A(s) (GHz)"] / df_anneal["B(s) (GHz)"]

time: 9.21 ms (started: 2022-03-04 18:00:53 +01:00)


In [10]:
plot_dir = project_dir / f"results/plots/qbm/anneal_schedules/"
if not plot_dir.exists():
    plot_dir.mkdir(parents=True)
    
for s_pause in [0.55, 1.0]:
    t_a = 20
    t_pause = s_pause * t_a
    pause_duration = 0
    α_quench = 2 # Advantage systems
    quench_duration = (1 - s_pause) / α_quench

    s_left = np.arange(0, s_pause + 1e-3, 1e-3)
    s_right = np.arange(s_pause + 1e-3, 1 + 1e-3, 1e-3)
    s = np.round(np.concatenate((s_left, s_right)), 3)
    t_left = np.linspace(0, t_pause, len(s_left))
    t_right = np.linspace(
        t_pause + pause_duration,
        t_pause + pause_duration + quench_duration,
        len(s_right),
    )
    t = np.round(np.concatenate((t_left, t_right)), 3)

    fig, ax = plt.subplots(figsize=(10, 6), dpi=300)

    if s_pause == 1:
        ax.set_xticks(np.arange(0, 22, 2))
    ax.plot(
        t,
        df_anneal["A(s) (GHz)"],
        color="tab:blue",
        linewidth=2,
        label="A(s)",
    )
    ax.plot(
        t,
        df_anneal["B(s) (GHz)"],
        color="tab:red",
        linewidth=2,
        label="B(s)",
    )
    ax.set_xlabel(r"$t$ [μs]")
    ax.set_ylabel(r"$E$ [GHz]")
    ax.grid(alpha=0.7)
    ax.legend()

    plt.tight_layout()
    plt.savefig(plot_dir / f"{qpu_params['solver']}-s_pause={s_pause:.2f}-pause_duration={pause_duration}.png")

<Figure size 3000x1800 with 1 Axes>

<Figure size 3000x1800 with 1 Axes>

time: 1.19 s (started: 2022-03-04 23:08:36 +01:00)
