In [None]:
import numpy as np
import sys
import os
import scipy.stats as stats
from sklearn.metrics import mean_absolute_error as mae

current_dir = %pwd
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)

from datagen import synthetic_bn, synthetic_mn

In [10]:
# Hyperparameters for results of different datasets

type = "mn"
k = 4
rseed = 42
tr = 1000
ft = 40

In [None]:
if type == "bn":
    # Get y_true for BN data
    input_shape, train_ds, test_ds = synthetic_bn(k, rseed)
    # get y_val for validation dataset (here we use all the true probs for y_val)
    x_test, y_test = [], []
    bits = np.array([int(2 ** i) for i in range(input_shape)][::-1], dtype='int')
    for input, target in test_ds: 
        x_test.extend(input.numpy())
    x_test = np.vstack(x_test)

    dat = np.load(f"../synthetic_data/bn/bn_synthetic_{k}.npz", allow_pickle=True)
    true_p = dat['prob']
    for x in x_test:
        idx = x@bits.T
        y_test.append(true_p[idx])
    y_test = np.array(y_test)
    
elif type == "mn":
    # Get y_true for MN data
    input_shape, train_ds, test_ds = synthetic_mn(k, rseed)
    y_test = np.concatenate([target.numpy() for input, target in test_ds])

In [12]:
Ls = [-1, 10, 100, 1000]
MAEs_nn = np.empty(100)
times_nn = np.empty(100)
MAEs_pgm = np.empty((100, len(Ls)))
times_pgm = np.empty((100, len(Ls)))
for r in range(1, 100):
    hist_nn = np.load(f"your_hist_path/{type}{k}/run_{r}/hist_{type}{k}_{r}_bp_-4_epoch_{tr}.npz", allow_pickle=True)
    prob_nn = hist_nn['proba'].reshape(-1)
    MAE_nn = mae(y_test, prob_nn)
    MAEs_nn[r] = MAE_nn
    times_nn[r] = hist_nn['time']
    for L in Ls:
        hist_pgm = np.load(f"your_res_path/{type}{k}/run_{r}/res_{type}{k}_{r}_epoch_{tr}_-4_hmc{L}_{ft}_-4.npz", allow_pickle=True)
        prob_pgm = hist_pgm['prob'].reshape(-1)
        MAE_pgm = mae(y_test, prob_pgm)
        MAEs_pgm[r, Ls.index(L)] = MAE_pgm
        times_pgm[r, Ls.index(L)] = hist_pgm['time']

In [None]:
avg_mae_nn = np.mean(MAEs_nn)
avg_time_nn = np.mean(times_nn)
avg_mae_pgm = np.mean(MAEs_pgm, axis=0)
avg_time_pgm = np.mean(times_pgm, axis=0)
cnt = (MAEs_pgm < MAEs_nn[:, None]).sum(axis=0)

print("NN")
print("MAE: ", avg_mae_nn)
print("Time: ", avg_time_nn)
print("-----------------------------------")
for i, L in enumerate(Ls):
    print(L)
    print("MAE: ", avg_mae_pgm[i])
    print("Time: ", avg_time_pgm[i])
    print("T-Test p: ", stats.ttest_rel(MAEs_nn, MAEs_pgm[:, i], alternative='greater').pvalue)
    print("Sign Test p: ", stats.binomtest(cnt[i], n=100, p=0.5, alternative='greater').pvalue)
    print("-----------------------------------")