In [1]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import os
import pickle
import scipy
import time 
from tqdm import tqdm

import sys 
sys.path.append('../../')
from model_comparison.utils import *
from model_comparison.mdns import *
from model_comparison.models import PoissonModel, NegativeBinomialModel

from delfi.distribution.mixture import MoG
%matplotlib inline

## Generate large data sets

In [2]:
seed = 2
rng = np.random.RandomState(seed=seed)
time_stamp = time.strftime('%Y%m%d%H%M_')
figure_folder = '../figures/'

In [3]:
sample_size = 10
ntrain = int(1e6)
ntest = 100

k2 = 2.
theta2 = 1.0

k3 = 2.
theta3 = 2. 

# then the scale of the Gamma prior for the Poisson is given by
theta1 = 2.0
k1 = (k2 * theta2 * k3 * theta3) / theta1
print(k1)


model_poisson = PoissonModel(sample_size=sample_size, seed=seed, n_workers=1)
model_nb = NegativeBinomialModel(sample_size=sample_size, seed=seed, n_workers=1)

4.0


In [4]:
# from Gamma prior for Poisson 
prior_lam = scipy.stats.gamma(a=k1, scale=theta1)
prior_k = scipy.stats.gamma(a=k2, scale=theta2)
prior_theta = scipy.stats.gamma(a=k3, scale=theta3)

In [5]:
n = ntrain
params_poi = prior_lam.rvs(size=int(n / 2))
params_nb = np.vstack((prior_k.rvs(size=int(n / 2)), 
                       prior_theta.rvs(size=int(n / 2)))).T

In [6]:
data_poi = model_poisson.gen(params_poi)
data_nb = model_nb.gen(params_nb)

## Calculate stats 

In [7]:
stats_poi = np.array([data_poi.mean(axis=1), data_poi.var(axis=1)]).T
stats_nb = np.array([data_nb.mean(axis=1), data_nb.var(axis=1)]).T

## Test data set 

In [8]:
n = ntest
params_poi_test = prior_lam.rvs(size=int(n / 2))
params_nb_test = np.vstack((prior_k.rvs(size=int(n / 2)), 
                       prior_theta.rvs(size=int(n / 2)))).T

In [9]:
data_poi_test = model_poisson.gen(params_poi_test)
data_nb_test = model_nb.gen(params_nb_test)

In [10]:
stats_poi_test = np.array([data_poi_test.mean(axis=1), data_poi_test.var(axis=1)]).T
stats_nb_test = np.array([data_nb_test.mean(axis=1), data_nb_test.var(axis=1)]).T

## Calculate true posterior probs

In [11]:
xtest = np.vstack((data_poi_test, data_nb_test))

In [12]:
ppoi_exact = []
for xi in tqdm.tqdm(xtest): 
    nb_logevi = calculate_nb_evidence(xi, k2, theta2, k3, theta3, log=True)
    poi_logevi = poisson_evidence(xi, k=k1, theta=theta1, log=True)
    ppoi_exact.append(calculate_pprob_from_evidences(np.exp(poi_logevi), np.exp(nb_logevi)))

 14%|█▍        | 14/100 [00:04<00:24,  3.47it/s]


KeyboardInterrupt: 

## Do rejection sampling for every test sample

In [None]:
test_set = np.vstack((stats_poi_test, stats_nb_test))
phat = np.zeros((ntest, 2))
eps = 10

for ii in tqdm.tqdm(range(ntest)): 
    sxo = test_set[ii, ]
    accepted_mi, accepted_params, differences = rejection_abc_from_stats(sxo, 
                                                                       [stats_poi, stats_nb], 
                                                                       [0.5, 0.5], 
                                                                       niter=100000, verbose=False, eps=eps)
    phat[ii, 1] = np.mean(accepted_mi)
    phat[ii, 0] = 1 - phat[ii, 1]

## Do density estimation with same training set 

In [None]:
# shuffle and set up model index target vector 
x_all = np.vstack((data_poi, data_nb))

# define model indices
m_all = np.hstack((np.zeros(data_poi.shape[0]), np.ones(data_nb.shape[0]))).squeeze().astype(int).tolist()

# get shuffled indices 
shuffle_indices = np.arange(n)
np.random.shuffle(shuffle_indices)

x, xtest = x_all[:ntrain, :], x_all[ntrain:, :]
m, mtest = m_all[:ntrain], m_all[ntrain:]

# calculate summary stats
sx = calculate_stats_toy_examples(x)
sx_test = calculate_stats_toy_examples(xtest)
# use training norm to normalize test data 
sx_zt, training_norm = normalize(sx)
sx_test_zt, training_norm = normalize(sx_test, training_norm)

In [None]:
model = ClassificationMDN(n_input=2, n_hidden_units=10, n_hidden_layers=1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
trainer = Trainer(model, optimizer, verbose=True, classification=True)

n_epochs = 10
n_minibatch = int(ntrain / 100)

# train with training data
loss_trace = trainer.train(sx_zt, m, n_epochs=n_epochs, n_minibatch=n_minibatch)
plt.plot(loss_trace)
plt.ylabel('loss')
plt.xlabel('iterations');

In [None]:
ppoi_hat = model.predict(sx_test_zt)[:, 0]

In [None]:
plt.figure(figsize=(18, 5))
plt.plot(phat[:, 0], '-o', label='rejection abc')
plt.plot(ppoi_hat, '-o', label='density abc')
plt.plot(ppoi_exact, '-o', label='exact')
plt.legend()
plt.tight_layout()