# Josephson Juction data example
From https://www.itl.nist.gov/div898/handbook/eda/section4/eda424.htm

One interesting thing about this data is that it is discrete (presumably because of rounding).

The strategy here is to treat this just like the Normal Random Data example, but then add a model that includes the rounding effect.

In [None]:
import numpy as np
import scipy.stats as st
import pymc3 as pm
import matplotlib.pyplot as plt

In [None]:
%matplotlib inline

In [None]:
# suppress some warnings from pymc3 output
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
# set random seed for reproducibility
np.random.seed(412)

The data is downloaded from https://www.itl.nist.gov/div898/handbook/datasets/SOULEN.DAT

In [None]:
filename='SOULEN.DAT'
data=(np.loadtxt(filename,skiprows=25,dtype='int')).flatten()

In [None]:
plt.plot(data,'.')

In [None]:
plt.hist(data,bins=np.arange(data.min()-0.5,data.max()+1.5));

In [None]:
print('Sample mean: {}, Sample std: {}'.format(data.mean(), data.std(ddof=1)))

## MCMC

It seems that the flat priors cause the sampler to fail, so I'll use weakly informative priors.

In [None]:
with pm.Model() as model1:
    #logs = pm.Flat('logs')
    logs = pm.Normal('logs',mu=np.log(data.std(ddof=1)),sigma=10*np.log(data.std(ddof=1)))
    s = pm.Deterministic('s',pm.math.exp(logs))
    s2 = pm.Deterministic('s2',s*s)
    #mu = pm.Flat('mu')
    mu = pm.Normal('mu',mu=data.mean(),sigma=10*data.std(ddof=1))
    yobs = pm.Normal('yobs',mu=mu,sigma=s,observed=data)

In [None]:
model1.name='Unknown mean and var'

In [None]:
with model1:
    trace1 = pm.sample(1000)

In [None]:
pm.traceplot(trace1,var_names=['mu','s2']);

In [None]:
pm.plot_posterior(trace1,var_names=['mu','s2']);

In [None]:
import corner

In [None]:
corner.corner(np.vstack([trace1['mu'],trace1['s2']]).T,labels=['mu','s2']);

In [None]:
np.quantile(trace1['mu'],[0.025,0.5,0.9725])

In [None]:
np.quantile(trace1['s2'],[0.025,0.5,0.9725])

## MCMC with latent rounding model

In [None]:
# write log-likelihood for rounded normal
def RNlogp(mu, sigma, x):
    # This is slow, but it seems to work
    cdf = lambda x: pm.math.exp(pm.Normal.dist(mu=mu,sigma=sigma).logcdf(x))
    return pm.math.log(cdf(x+0.5)-cdf(x-0.5))


In [None]:
# write random model for RN
def RNrand(point, size):
    mu=point['mu']
    sigma=point['s']
    raw = pm.Normal.dist(mu=mu,sigma=sigma).random(size=size)
    return np.floor(raw+0.5)

In [None]:
with pm.Model() as model2:
    #logs = pm.Flat('logs')
    logs = pm.Normal('logs',mu=np.log(data.std(ddof=1)),sigma=10*np.log(data.std(ddof=1)))
    s = pm.Deterministic('s',pm.math.exp(logs))
    s2 = pm.Deterministic('s2',s*s)
    #mu = pm.Flat('mu')
    mu = pm.Normal('mu',mu=data.mean(),sigma=10*data.std(ddof=1))
    yobs = pm.DensityDist('yobs',logp=lambda x: RNlogp(mu,s,x),random=RNrand,observed=data)
    #yobs = pm.Potential('yobs', RNlogp(mu,s,data))

In [None]:
model2.name='Rounded'

In [None]:
with model2:
    trace2=pm.sample(1000)

In [None]:
pm.traceplot(trace2,var_names=['mu','s2']);

In [None]:
pm.plot_posterior(trace2,var_names=['mu','s2']);

In [None]:
np.quantile(trace2['mu'],[0.025,0.5,0.975])

In [None]:
np.quantile(trace2['s2'],[0.025,0.5,0.975])

In [None]:
corner.corner(np.vstack([trace2['mu'],trace2['s2']]).T,labels=['mu','s2']);

In [None]:
with model2:
    map2=pm.find_MAP()

In [None]:
map2

In [None]:
with model2:
    post2 = pm.sample_posterior_predictive(trace2,500)

In [None]:
bmin = min(data.min(),post2['yobs'].min())-0.5
bmax = max(data.max(),post2['yobs'].max())+1.5
blist = np.arange(bmin,bmax)
plt.hist(post2['yobs'],bins=blist,alpha=0.5,density=True,label='model')
plt.hist(data,bins=blist,alpha=0.5,density=True,label='data')
plt.title('Normalized histogram')
plt.legend();

The posterior predictive samples look like the data, so I think the model is believeable.

Comparing the two models shows that the Rounded model is no better than the basic model.

In [None]:
models={model1: trace1, model2:trace2}
pm.compare(models,ic='WAIC')

In [None]:
pm.compare(models,ic='LOO')