In [None]:
%load_ext autoreload
%autoreload 2

import src.ksd.models as models
from src.ksd.find_modes import find_modes, pairwise_directions, run_bfgs
from src.ksd.langevin import RandomWalkMH, RandomWalkBarker
from src.ksd.ksd import KSD
from src.ksd.kernel import IMQ
from src.ksd.bootstrap import Bootstrap
from tqdm.notebook import tqdm, trange

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
import matplotlib.pyplot as plt
import seaborn as sns

import kgof.density as density
import kgof.data as data

MCMCKernel = RandomWalkMH # RandomWalkBarker 

In [2]:
def rbm_samples_scores(
    seed,
    m,
    sigma,
    c_loc,
    dx=50,
    dh=40,
    burnin_number=2000,
):
    """
    Generate data for the Gaussian-Bernoulli Restricted Boltzmann Machine (RBM) experiment.
    The entries of the matrix B are perturbed.
    This experiment was first proposed by Liu et al., 2016 (Section 6)
    inputs: seed: non-negative integer
            m: number of samples
            sigma: standard deviation of Gaussian noise
            dx: dimension of observed output variable
            dh: dimension of binary latent variable
            burnin_number: number of burn-in iterations for Gibbs sampler
    outputs: 2-tuple consisting of
            (m,dx) array of samples generated using the perturbed RBM
            (m,dx) array of scores computed using the non-perturbed RBM (model)
    """
    # the perturbed model is fixed, randomness comes from sampling
    rs = np.random.RandomState(0)

    # Model p
    B = rs.randint(0, 2, (dx, dh)) * 6 - 3.0
    b = rs.randn(dx)
    c = rs.randn(dh) + c_loc
    p = density.GaussBernRBM(B, b, c)

    # Sample from q
    q = density.GaussBernRBM(B, b, c)
    ds = q.get_datasource()
    ds.burnin = burnin_number
    samples = ds.sample(m, seed=seed).data()

    # Compute score under p
    scores = p.grad_log(samples)

    return samples, scores, q

In [3]:
n = 1000
dim = 5 # 10
dh = 20 # 20
sigma = 0.
c_loc_on = 0.
c_loc_off = -2.
seed = 0

In [4]:
off_samples, _, off_target = rbm_samples_scores(seed, n, sigma, c_loc_off, dim, dh)
# off_target.log_prob = lambda x: tf.Variable(off_target.log_den(x), dtype=tf.float32)
off_target.log_prob = off_target.log_den
log_prob_off_fn = off_target.log_prob

on_samples, _, target = rbm_samples_scores(seed, n, sigma, c_loc_on, dim, dh)
# target.log_prob = lambda x: tf.Variable(target.log_den(x), dtype=tf.float32)
target.log_prob = target.log_den
log_prob_fn = target.log_prob

2022-03-02 17:58:09.027515: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-03-02 17:58:09.613902: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9658 MB memory:  -> device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:b2:00.0, compute capability: 7.5


In [6]:
log_prob_fn(tf.cast(on_samples, dtype=tf.float32))

tf.Tensor(
[[ -1.0964608   31.571644    -9.073103    -6.932337     9.135106  ]
 [ -8.753344    10.014806   -29.880587    14.243454    -3.4977088 ]
 [  1.6559467   32.368774   -10.167375    -7.0314856    9.484306  ]
 [ -6.997783    11.957458   -29.45609     14.584537    -5.455504  ]
 [ -2.1302385   22.645164    -0.29366672 -14.482177    19.084684  ]], shape=(5, 5), dtype=float32) [[-3.  3.  3. -3.  3.]
 [-3.  3.  3. -3. -3.]
 [-3.  3. -3.  3.  3.]
 [ 3. -3.  3. -3.  3.]
 [-3.  3. -3.  3.  3.]]


InvalidArgumentError: cannot compute Sub as input #1(zero-based) was expected to be a double tensor but is a float tensor [Op:Sub]

In [None]:
plt.scatter(on_samples[:, 0], on_samples[:, 1], label="on target", alpha=0.5)
plt.scatter(off_samples[:, 0], off_samples[:, 1], label="off target", alpha=0.5)
_ = plt.legend()

In [None]:
tf.experimental.numpy.random.randn(10, 5)

In [None]:
nrep = 50
num_boot = 800
alpha = 0.05

kernel = IMQ(med_heuristic=True)
ksd = KSD(target=target, kernel=kernel)
bootstrap = Bootstrap(ksd, x_t.shape[0])

In [None]:
multinom_samples = bootstrap.multinom.sample((nrep, num_boot))

multinom_one_sample = multinom_samples[0, :]

In [None]:
def repeat_experiment(multinom_samples, 
                      bootstrap, 
                      alpha, 
                      num_boot,
                      x_t):
    
    p_val_list = []
    for r in trange(multinom_samples.shape[0]):
        multinom_one_sample = multinom_samples[r, :, :]
        _, p_val = bootstrap.test_once(alpha=alpha, num_boot=num_boot, X=x_t, multinom_samples=multinom_one_sample)
        p_val_list.append(p_val)
        
    return p_val_list


In [None]:
p_val_list = repeat_experiment(
    multinom_samples, 
    bootstrap, 
    alpha, 
    num_boot,
    x_t)

In [None]:
p_val_df = pd.DataFrame({"p_val": p_val_list})
sns.ecdfplot(data=p_val_df)
plt.axis(xmin=-0.01, xmax=1., ymin=0, ymax=1.01)