# Data Analysis

This notebook is devoted to the analysis of the DWAVE data and the comparison between them and the data generated by the Neural Network (MADE for the moment) 

In [None]:
import glob

import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tqdm import tqdm

from src.utils.utils import plot_hist, block_mean, block_std, get_energy
from src.utils.montecarlo import neural_mcmc

## Check energy

Load dataset as it is saved by [DWAVE system](https://cloud.dwavesys.com/leap/login/?next=/leap/). Since there is a maximum time for the annealing process, data are saved in files with maximum size of 10k sample. In the same folder one can find also two energy file, one computed directly by DWAVE annealer and the other one with our custom algorithm. Both should give us the same result. So, here we load the dataset and we rearrange them in two files, namely train and validation dataset.

In [None]:
files = glob.glob(f'484spins-1nn-uniform-10mus/configs_*')
#print(files)

arrs = []
for file in files:
    arrs.append(np.load(file))
dataset = np.concatenate(arrs, axis=0)
print(dataset.shape)

train_data, test_data = train_test_split(dataset, test_size=0.15)

# Comment off the following lines to save the datasets.
np.save('train-484spins-1nn-uniform-10mus', train_data)
np.save('test-484spins-1nn-uniform-10mus', test_data)


Here we load a small part of the original dataset in order to check if the energy is well computed.

In [None]:
sample = np.load("../data/1-50k_open-simple_10mu/configs_0.npy")
dwave_eng = np.load("../data/1-50k_open-simple_10mu/dwave-engs_0.npy")
eng = np.loadtxt("../data/1-50k_open-simple_10mu/energies_0.txt")

print(f"Energy (from DWAVE) {dwave_eng[:4]}\nEnergy (our algo) {-eng[:4]*484}")

## Plot histograms

Here we want to check if the Neural Network has been well trained, a good measure could also be the mean energy of the DWAVE data and the generated data.

In [None]:
path=["data/generated/sample-400000_size-484_2022-01-24_14-48-32.npz"]
labels = [r"MADE", r"DWave $100\mu$s"]# r"Dwave 100$\mu$s", r"NN Re-Weighted",]
truth = "data/datasets/484-1nn-100mu/DWAVE-train-484spins-1nn-100mu.npy"
ground_state = -1.2769078780 
couplings_path="data/couplings/484spins_open-1nn.txt"

engs, eng_truth = plot_hist(path, couplings_path, truth, ground_state=ground_state, labels=labels, save=True)


## Results with Trained MADE on DWave Data

Computing the acceptance rate for some $\beta \in (0,5)$ we can notice that it exists an effective $\beta$, let's call it $\beta_{eff}$ that is increasing according to the annealing time selected in the D-Wave machine. 

In [None]:
datasets = ["data/generated/sample-400000_size-484_2022-01-28_11-09-48.npz", 
            "data/generated/sample-400000_size-484_2022-01-28_11-10-12.npz",
            "data/generated/sample-400000_size-484_2022-01-24_14-48-32.npz", ]
betas = np.arange(0.1, 2., step=0.1)

In [None]:
acc_rates = []
for dataset in tqdm(datasets, leave=True):
    acc_rate = []
    for beta in betas:
        _, _, ar = neural_mcmc(beta, 10000, dataset, couplings_path, "made")
        acc_rate.append(ar)
    acc_rates.append(acc_rate)

In [None]:
fig, ax = plt.subplots(figsize=(8,8), facecolor='white')

plt.rcParams['mathtext.fontset']= "stix"
plt.rcParams['font.family']= 'STIXGeneral'

stringfont = 'serif'

plt.tick_params(axis='y', labelsize=10)
plt.tick_params(axis='x', labelsize=10)

#ax.set_xticklabels([r'0', r'0.5', r'1',r'1.5',r'2'], fontsize=12, fontfamily=stringfont)
#ax.set_yticklabels([r'0',r'1',r'2',r'3',r'4',r'5'], fontsize=12, fontfamily=stringfont)

plt.tick_params(top=True, right=True, labeltop=False, labelright=False, direction='in')
plt.tick_params(top=True, right=True, labeltop=False, labelright=False, direction='in')

labels = [r"10 $\mu$s",r"50 $\mu$s", r"100 $\mu$s"]

for i, acc_rate in enumerate(acc_rates):
    plt.plot(acc_rate, "-.", label=labels[i], linewidth=3.)

plt.xlim(0,3)
plt.ylim(0,7)

plt.ylabel(r"$\mathrm{A_r}$[%]", fontsize=18, fontfamily=stringfont)
plt.xlabel(r"$\mathrm{\beta}$", fontsize=18, fontfamily=stringfont)

plt.legend(loc='best', fontsize=18, labelspacing=0.4, borderpad=0.2)

#plt.savefig("ar-vs-beta.png", edgecolor='white', facecolor=fig.get_facecolor(), bbox_inches='tight')

## Energy versus Step

Is useful to compare thermalizatin time, i.e., the time that the simulation needs to converge, w.r.t. the single spin flip and the neural MCMC. 

In [None]:
fig, ax = plt.subplots(figsize=(8,8), facecolor='white')

plt.rcParams['mathtext.fontset']= "stix"
plt.rcParams['font.family']= 'STIXGeneral'
plt.rcParams['axes.linewidth'] = 1.5

stringfont = 'serif'

plt.fill_between(np.arange(np.asarray(engs).shape[1]), np.asarray(engs).mean(axis=0) + np.asarray(engs).std(axis=0, ddof=1), np.asarray(engs).mean(axis=0) - np.asarray(engs).std(axis=0, ddof=1),  alpha=0.1, color="b")
plt.plot(np.asarray(engs).mean(axis=0), label=r"MCMC Single Spin Flip $\beta=2$", color="b", linewidth=1.5)
plt.plot(eng_truth, label=r"Neural MCMC $\beta=2$", color='orange', alpha=0.7)

ax.set_xscale("log")

ax.tick_params(axis='y', labelsize=18)
ax.tick_params(axis='x', labelsize=18)
ax.tick_params(which='both', top=True, right=True, direction='in')

plt.hlines(-1.2210371388, xmin=0, xmax=1000000, colors='red', linestyles='dashed', label="Ground State", linewidth=3.)

plt.xlim(0,1000000)
plt.ylim(-1.25,0.3)

plt.ylabel(r"$\mathrm{\frac{E}{N}}$", fontsize=26, fontfamily=stringfont, fontweight='bold')
plt.xlabel(r"$\mathrm{\tau}$", fontsize=26, fontfamily=stringfont, fontweight='bold')

plt.legend(loc='best', fontsize=18, labelspacing=0.4, borderpad=0.2)

plt.savefig("energy-vs-steps.png", edgecolor='white', facecolor=fig.get_facecolor(), bbox_inches='tight')

## Block Mean and STD

Since the long theremalization time shown in the previous plot, we must compute energy mean in a proper way, using the block average procedure. The first step is to choose a suitable block lenght. 

In [None]:

len_blocks = range(1, 10000, 5)
block_stds = []
block_means = []
for len_block in len_blocks:
    block_stds.append(block_std(engs, len_block, skip=0))
    block_means.append(block_mean(engs, len_block, skip=0))
block_stds = np.asarray(block_stds)
block_means = np.asarray(block_means)


#blocks = [2, 4, 5, 8, 10, 16, 20, 25, 40, 50, 100, 1000, 10000, 20000, 40000]
truth_std = []
truth_mean = []
for len_block in len_blocks:
    truth_std.append(block_std([eng_truth], len_block))
    truth_mean.append(block_mean([eng_truth], len_block))
truth_std = np.asarray(truth_std)
truth_mean = np.asarray(truth_mean)



fig, ax = plt.subplots(figsize=(8,8), facecolor='white')

plt.rcParams['mathtext.fontset']= "stix"
plt.rcParams['font.family']= 'STIXGeneral'
plt.rcParams['axes.linewidth'] = 1.5


stringfont = 'serif'

plt.tick_params(axis='y', labelsize=18)
plt.tick_params(axis='x', labelsize=18)
plt.tick_params(top=True, right=True, labeltop=False, labelright=False, direction='in')
plt.tick_params(top=True, right=True, labeltop=False, labelright=False, direction='in')

plt.plot(np.arange(1, 10000, 5), block_stds[:,-3], "o", label=r"Error MCMC (2000 flip) $\beta=1.8$")
#plt.ylim(0,0.0001)

plt.ylabel(r"$\mathrm{\frac{\sigma}{\sqrt{n}}}$", fontsize=26, fontfamily=stringfont)
plt.xlabel(r"Block Size", fontsize=22, fontfamily=stringfont)

plt.legend(loc='best', fontsize=18, labelspacing=0.4, borderpad=0.2)

plt.savefig("std-vs-blocksize_beta.png", edgecolor='white', facecolor=fig.get_facecolor(), bbox_inches='tight')

In [None]:
couplings_path="data/couplings/100spins_open-1nn.txt"

mu100_paths = ["100spins-neuralMCMC/100spins_beta0.8_neural-mcmc_199997steps.npz", "100spins-neuralMCMC/100spins_beta1.0_neural-mcmc_199997steps.npz", 
    "100spins-neuralMCMC/100spins_beta1.2_neural-mcmc_199997steps.npz", "100spins-neuralMCMC/100spins_beta1.4_neural-mcmc_199997steps.npz", 
    "100spins-neuralMCMC/100spins_beta1.6_neural-mcmc_199997steps.npz", "100spins-neuralMCMC/100spins_beta1.8_neural-mcmc_199997steps.npz", 
    "100spins-neuralMCMC/100spins_beta2.0_neural-mcmc_199997steps.npz", "100spins-neuralMCMC/100spins_beta2.2_neural-mcmc_199997steps.npz",
    ]
mu10_paths = ["100spins-neuralMCMC/100spins_beta0.8_neural-mcmc_199998steps.npz", "100spins-neuralMCMC/100spins_beta1.0_neural-mcmc_199998steps.npz", 
    "100spins-neuralMCMC/100spins_beta1.2_neural-mcmc_199998steps.npz", "100spins-neuralMCMC/100spins_beta1.4_neural-mcmc_199998steps.npz", 
     "100spins-neuralMCMC/100spins_beta1.6_neural-mcmc_199998steps.npz", "100spins-neuralMCMC/100spins_beta1.8_neural-mcmc_199998steps.npz", 
     "100spins-neuralMCMC/100spins_beta2.0_neural-mcmc_199998steps.npz", "100spins-neuralMCMC/100spins_beta2.2_neural-mcmc_199998steps.npz"]
mu1_paths = ["100spins-neuralMCMC/100spins_beta0.8_neural-mcmc_199999steps.npz", "100spins-neuralMCMC/100spins_beta1.0_neural-mcmc_199999steps.npz", 
    "100spins-neuralMCMC/100spins_beta1.2_neural-mcmc_199999steps.npz", "100spins-neuralMCMC/100spins_beta1.4_neural-mcmc_199999steps.npz", 
     "100spins-neuralMCMC/100spins_beta1.6_neural-mcmc_199999steps.npz", "100spins-neuralMCMC/100spins_beta1.8_neural-mcmc_199999steps.npz", 
     "100spins-neuralMCMC/100spins_beta2.0_neural-mcmc_199999steps.npz", "100spins-neuralMCMC/100spins_beta2.2_neural-mcmc_199999steps.npz"]
single_spin = ["100spins-open1nn-single/100spins-seed12345-sample200000-beta0.8.npy", "100spins-open1nn-single/100spins-seed12345-sample200000-beta1.0.npy", 
    "100spins-open1nn-single/100spins-seed12345-sample200000-beta1.2.npy", "100spins-open1nn-single/100spins-seed12345-sample200000-beta1.4.npy", 
    "100spins-open1nn-single/100spins-seed12345-sample200000-beta1.6.npy", "100spins-open1nn-single/100spins-seed12345-sample200000-beta1.8.npy",
    "100spins-open1nn-single/100spins-seed12345-sample200000-beta2.0.npy", "100spins-open1nn-single/100spins-seed12345-sample200000-beta2.2.npy"]

eng_1mu = get_energy(10, mu1_paths, couplings_path)
eng_10mu = get_energy(10, mu10_paths, couplings_path)
eng_100mu = get_energy(10, mu100_paths, couplings_path)
eng_single = get_energy(10, single_spin, couplings_path)

In [None]:
fig, ax = plt.subplots(figsize=(8,8), facecolor='white')

plt.rcParams['mathtext.fontset']= "stix"
plt.rcParams['font.family']= 'STIXGeneral'
plt.rcParams['axes.linewidth'] = 1.5

stringfont = 'serif'

plt.tick_params(axis='y', labelsize=18)
plt.tick_params(axis='x', labelsize=18)
plt.tick_params(top=True, right=True, labeltop=False, labelright=False, direction='in')
plt.tick_params(top=True, right=True, labeltop=False, labelright=False, direction='in')

plt.errorbar([0.8, 1, 1.2, 1.4, 1.6, 1.8, 2., 2.2], block_mean(eng_1mu, 2000), yerr=block_std(eng_1mu, 2000), elinewidth=1.5, linewidth=.1, marker='o', fillstyle='none', markersize=8, markeredgewidth=2, label=r"Neural MCMC $1\mu$s")
plt.errorbar([0.8, 1, 1.2, 1.4, 1.6, 1.8, 2., 2.2], block_mean(eng_10mu, 2000), yerr=block_std(eng_10mu, 2000), elinewidth=1.5, linewidth=.1, marker='^', fillstyle='none', markersize=8, markeredgewidth=2, label=r"Neural MCMC $10\mu$s")
plt.errorbar([0.8, 1, 1.2, 1.4, 1.6, 1.8, 2., 2.2], block_mean(eng_100mu, 2000), yerr=block_std(eng_100mu, 2000), elinewidth=1.5, linewidth=.1, marker='d', fillstyle='none', markersize=8, markeredgewidth=2, label=r"Neural MCMC $100\mu$s")
plt.errorbar([0.8, 1, 1.2, 1.4, 1.6, 1.8, 2., 2.2], block_mean(eng_single, 3000, skip=800), yerr=block_std(eng_single, 3000, skip=1000), elinewidth=1.5, linewidth=.1, marker='s', fillstyle='none', markersize=8, markeredgewidth=2, label=r"MCMC (2000 flips)")

plt.hlines(-1.2210371388, xmin=0.6, xmax=2.4, colors='red', linestyles='dashed', label="Ground State", linewidth=3)

plt.xlim(0.7, 2.3)

plt.ylabel(r"$\mathrm{\frac{E}{N}}$", fontsize=26, fontfamily=stringfont, fontweight='bold')
plt.xlabel(r"$\mathrm{\beta}$", fontsize=26, fontfamily=stringfont, fontweight='bold')

plt.legend(loc='best', fontsize=18, labelspacing=0.4, borderpad=0.2)

plt.savefig("energy_neural-vs-mcmc.png", edgecolor='white', facecolor=fig.get_facecolor(), bbox_inches='tight')