# Data Analysis

This notebook is devoted to the analysis of the DWAVE data and the comparison between them and the data generated by the Neural Network (MADE for the moment) 

In [None]:
import glob
from typing import Sequence, List, Optional

from math import sqrt
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from numba import jit
from tqdm import tqdm

from src.utils.smart_montecarlo import mcmc

In [None]:
@jit(nopython=True)
def econf(Lx: int, J: np.ndarray, S0: np.ndarray) -> float:
    energy = 0.
    for kx in range(Lx):
        for ky in range(Lx):

            k = kx + (Lx * ky)
            R = kx + 1  # right spin
            D = ky + 1  # down spin

            kR = k - ky  # coupling to the right of S0[kx,ky]
            kD = k  # coupling to the down of S0[kx,ky]

            # Tries to find a spin to right, if no spin, contribution is 0.
            Rs = S0[R, ky] * J[kR, 0] if R % Lx != 0 else 0
            # Tries to find a spin to left, if no spin, contribution is 0.
            Ds = S0[kx, D] * J[kD, 1] if D % Lx != 0 else 0

            energy += -S0[kx, ky] * (Rs + Ds)
    return energy / (Lx**2)


In [None]:
def get_coupling_open(Lx: int = 22, seed: int = 12345):
    np.random.seed(seed)
    N=Lx**2
    return (np.random.normal(0.0, 1.0, size=(N - Lx, 2)))

In [None]:
def plot_hist(gen_paths: Sequence[str], truth_path: str = "/home/scriva/pixel-cnn/data/100-v1/train_100_lattice_2d_ising_spins.npy", seed: int = 12345, labels: Optional[List[str]] = None, save: bool = False) -> None:
    if labels is None:
        labels = [f"eng{i}" for i, _ in enumerate(gen_paths)]

    truth = np.load(truth_path)
    
    try:
        truth = truth["sample"]
    except:
        print(f"No sample subdir found in {truth_path} \nLoading from path...")
        truth = truth

    truth = np.reshape(truth, (-1, int(sqrt(truth.shape[-1])), int(sqrt(truth.shape[-1]))), order='F')
    max_len_sample = truth.shape[0]
    square_spin = truth.shape[-1]

    np.random.seed(seed)
    J = get_coupling_open(Lx=square_spin, seed=seed) # (square_spin, seed=seed)

    eng_truth = []
    for t in truth:
        eng_truth.append(econf(square_spin, J, t))
    eng_truth = np.asarray(eng_truth)
    
    engs= []
    for path in gen_paths:
        out = np.load(path)

        try:
            sample = out["sample"]
        except:
            print(f"No sample subdir found in {path} \nLoading from path...")
            sample = out
        
        sample = sample.squeeze()
        max_len_sample = min(max_len_sample, sample.shape[0])
        sample = np.reshape(sample, (-1, square_spin, square_spin), order='F')

        eng = []
        for s in sample:
            eng.append(econf(square_spin, J, s))
        eng = np.asarray(eng)

        engs.append(eng)

    fig, ax = plt.subplots(figsize=(8,8), facecolor='white')
    
    plt.rcParams['mathtext.fontset']= "stix"
    plt.rcParams['font.family']= 'STIXGeneral'

    stringfont = 'serif'

    plt.tick_params(axis='y',labelsize=12)
    plt.tick_params(axis='x',labelsize=12)
    
    bins = np.linspace(eng_truth.min(),engs[0].max()).tolist()

    for i, eng in enumerate(engs):
        _ = plt.hist(eng[:max_len_sample], bins=bins, log=True, label=f"{labels[i]}", alpha=0.8)
        print(f"\neng {i}\nmean: {eng.mean()}\nmin: {eng.min()} ({np.sum(eng==eng.min())} time(s))")
    
    _ = plt.hist(eng_truth[:max_len_sample], bins=bins, log=True, label=r"DWave 2000$\mu$s", histtype='bar', ec='black', alpha=0.2)
    print(f"\nDWave data eng\nmean: {eng_truth.mean()}\nmin: {eng_truth.min()}  ({np.sum(eng_truth==eng_truth.min())} time(s))")

    plt.ylabel("Count", fontsize=18, fontfamily=stringfont)
    plt.xlabel(r"$\frac{E}{N}$", fontsize=18, fontfamily=stringfont)

    plt.ylim(1, max_len_sample)
    plt.legend(loc='best', labelspacing=0.4, fontsize=14, borderpad=0.2)

    if save:
        plt.savefig("hist.png", edgecolor='white', facecolor=fig.get_facecolor(), bbox_inches='tight')

    return engs, eng_truth

## Check energy

Load dataset as it is saved by [DWAVE system](https://cloud.dwavesys.com/leap/login/?next=/leap/). Since there is a maximum time for the annealing process, data are saved in files with maximum size of 10k sample. In the same folder one can find also two energy file, one computed directly by DWAVE annealer and the other one with our custom algorithm. Both should give us the same result. So, here we load the dataset and we rearrange them in two files, namely train and validation dataset.

In [None]:
files = glob.glob(f'configs_*')
#print(files)

arrs = []
for file in files:
    arrs.append(np.load(file))
dataset = np.concatenate(arrs, axis=0)
print(dataset.shape)
train_data, test_data = train_test_split(dataset, test_size=0.15)

# Comment off the following lines to save the datasets.
# np.save('DWAVE-train-484spins-1nn-100mu', train_data)
# np.save('DWAVE-test-484spins-1nn-100mu', test_data)
np.save("1000mu", dataset)


Here we load a small part of the original dataset in order to check if the energy is well computed.

In [None]:
sample = np.load("../data/1-50k_open-simple_10mu/configs_0.npy")
dwave_eng = np.load("../data/1-50k_open-simple_10mu/dwave-engs_0.npy")
eng = np.loadtxt("../data/1-50k_open-simple_10mu/energies_0.txt")

print(f"Energy (from DWAVE) {dwave_eng[:4]}\nEnergy (our algo) {-eng[:4]*484}")

## Plot histograms

Here we want to check if the Neural Network has been well trained, a good measure could also be the mean energy of the DWAVE data and the generated data.

In [None]:
path=["data/datasets/484-1nn-10mu/DWAVE-test-484spins-10mu.npy", "data/datasets/484-1nn-50mu/DWAVE-test-484spins-50mu.npy", "data/datasets/484-1nn-100mu/DWAVE-test-484spins-1nn-100mu.npy"]  #["/home/beppe/neural-mcmc/sample-100000_size-484_2021-11-15_14_31_46.npz"]
labels = [r"Dwave 10$\mu$s", r"Dwave 50$\mu$s", r"Dwave 100$\mu$s", r"NN Re-Weighted",]
truth = "/home/beppe/neural-mcmc/2000mu.npy" #"/home/beppe/neural-mcmc/data/datasets/484-1nn-100mu/DWAVE-train-484spins-1nn-100mu.npy"
engs, eng_truth = plot_hist(path, truth_path=truth, labels=labels, save=True)


## Results with Trained MADE on DWave Data

Computing the acceptance rate for some $\beta \in (0,5)$ we can notice that it exists an effective $\beta$, let's call it $\beta_{eff}$ that is increasing according to the annealing time selected in the D-Wave machine. 

In [None]:
datasets = ["/home/beppe/neural-mcmc/data/generated/size-484_sample-100000_2.npz", "/home/beppe/neural-mcmc/data/generated/size-484_sample-100000_09-05-10.npz"]
            #"size-10_sample-100001_nade.npz",
            #"size-100_sample-100001_1p8bmv7t.npz",]
betas = np.arange(0.1, 3., step=0.1)

In [None]:
acc_rates = []
for dataset in tqdm(datasets, leave=True):
    acc_rate = []
    for beta in betas:
        ar = mcmc(beta, 50000, dataset)
        acc_rate.append(ar)
    acc_rates.append(acc_rate)

In [None]:
fig, ax = plt.subplots(figsize=(8,8), facecolor='white')

plt.rcParams['mathtext.fontset']= "stix"
plt.rcParams['font.family']= 'STIXGeneral'

stringfont = 'serif'

plt.tick_params(axis='y', labelsize=10)
plt.tick_params(axis='x', labelsize=10)

ax.set_xticklabels([r'0', r'0.5', r'1',r'1.5',r'2',r'2.5',r'3'], fontsize=12, fontfamily=stringfont)
ax.set_yticklabels([r'0',r'1',r'2',r'3',r'4',r'5'], fontsize=12, fontfamily=stringfont)

plt.tick_params(top=True, right=True, labeltop=False, labelright=False, direction='in')
plt.tick_params(top=True, right=True, labeltop=False, labelright=False, direction='in')

labels = [r"10 $\mu$s",r"50 $\mu$s"]

for i, acc_rate in enumerate(acc_rates):
    plt.plot(betas, acc_rate, "-.", label=labels[i])#, linewidth=3.)

plt.xlim(0,3)
plt.ylim(0,5)

plt.ylabel(r"$\mathrm{A_r}$[%]", fontsize=18, fontfamily=stringfont)
plt.xlabel(r"$\mathrm{\beta}$", fontsize=18, fontfamily=stringfont)

plt.legend(loc='best', fontsize=18, labelspacing=0.4, borderpad=0.2)

plt.savefig("ar-vs-beta.png", edgecolor='white', facecolor=fig.get_facecolor(), bbox_inches='tight')