In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import sys 

sys.path.append('../../')
from model_comparison.utils import *
from model_comparison.mdns import ClassificationSingleLayerMDN, Trainer, UnivariateMogMDN
from model_comparison.models import PoissonModel, NegativeBinomialModel
%matplotlib inline

In [None]:
mpl_params = {'legend.fontsize': 15,
                      'axes.titlesize': 20,
                      'axes.labelsize': 17,
                      'xtick.labelsize': 12,
                      'ytick.labelsize': 12,
             'figure.figsize' : (18, 5)}

mpl.rcParams.update(mpl_params)

In [None]:
seed = 1
rng = np.random.RandomState(seed=seed)

In [None]:
sample_size = 100
ntrain = 1000

# fix params for lambda and theta
theta1 = 9.0

k3 = 2.
theta3 = 2.0

theta2 = 1

model_poisson = PoissonModel(sample_size=sample_size, seed=seed, n_workers=1)
model_nb = NegativeBinomialModel(sample_size=sample_size, seed=seed, n_workers=1)

## Match the means and increase $k2$ to increase the variance in NB

In [None]:
k2s = np.linspace(1, 20, 5)
nb_means = np.zeros(k2s.size)
k1s = np.zeros_like(k2s)
poi_means = np.zeros(k2s.size)
nb_vars = np.zeros(k2s.size)
poi_vars = np.zeros(k2s.size)

plt.figure(figsize=(18, 10))

for ii, k2 in enumerate(k2s): 
    k1 = (k2 * theta2 * k3 * theta3) / theta1
    k1s[ii] = k1
    
    # from Gamma prior for Poisson 
    params_poi = rng.gamma(shape=k1, scale=theta1, size=int(ntrain / 2))

    # from two Gamma priors for indirect Poisson-Gamma Sampling.
    params_nb = np.vstack((rng.gamma(shape=k2, scale=theta2, size=int(ntrain / 2)), 
                           rng.gamma(shape=k3, scale=theta3, size=int(ntrain / 2)))).T
        
    data_poi = model_poisson.gen(params_poi)
    data_nb = model_nb.gen(params_nb)
    
    plt.subplot(2, 5, ii + 1)
    plt.hist(data_nb.mean(axis=1), color='C0', label='nb', bins=50)
    plt.hist(data_poi.mean(axis=1), color='C1', label='poi', bins=50)
    plt.axvline(x=data_nb.mean(axis=1).mean(), color='r')
    plt.axvline(x=data_poi.mean(axis=1).mean(), color='r')
    if ii==0: 
        plt.title('Sample means, k2={}'.format(k2))
        plt.ylabel('counts')
    else: 
        plt.title('k2={}'.format(k2))    
    plt.legend()
    
    plt.subplot(2, 5, ii + 6)
    if ii == 0: 
        plt.title('Sample variances, k2={}'.format(k2))
        plt.ylabel('counts')
    else: 
        plt.title('k2={}'.format(k2))    
        
    plt.hist(data_nb.var(axis=1), color='C0', label='nb', bins=50)
    plt.hist(data_poi.var(axis=1), color='C1', label='poi', bins=50)
    plt.axvline(x=data_nb.var(axis=1).mean(), color='r')
    plt.axvline(x=data_poi.var(axis=1).mean(), color='r')
    plt.legend()
        
    # expected means and variances 
    nb_means[ii] = data_nb.mean(axis=1).mean()
    nb_vars[ii] = data_nb.var(axis=1).mean()
    poi_means[ii] = data_poi.mean(axis=1).mean()
    poi_vars[ii] = data_poi.var(axis=1).mean()

In [None]:
expvar = k2s * k3 * theta2 * theta3 * (1 + k3 * theta3 + theta3)

plt.plot(expvar, nb_means, '-o', label='mean sample mean NB')
plt.plot(expvar, poi_means, '-o', label='mean sample mean Poisson')

plt.plot(expvar, poi_vars, '-o', label='mean sample variance Poi')
plt.plot(expvar, nb_vars, '-o', label='mean sample variance NB')

plt.xlabel(r'expected variance $\mathbf{E}[\sigma_{NB}ˆ2] \sim k2$')
plt.legend()
plt.title(r'Expected variance scales with $k2$');

In [None]:
expvar = k2s * k3 * theta2 * theta3 * (1 + k3 * theta3 + theta3)
plt.plot(nb_vars, k1s * theta1, '-o', label=r'mean of Gamma prior on Poisson rate: $k_1 \theta_1$')
plt.plot(nb_vars, k2s, '-o', label=r'$k2$')
plt.legend()
plt.title('Increasing $k2$ is compensated by increased Poisson rate to match the means')
plt.xlabel(r'expected variance $\mathbf{E}[\sigma_{NB}ˆ2] \sim k2$');