In [1]:
from kronecker import KroneckerSolver
import kernels as kern
from likelihoods import PoissonLike, GaussianLike
import data_utils as sim
import numpy as np
import itertools
from kernels import RBF
from grid_utils import fill_grid
import sys
import tensorflow as tf
import tensorflow.contrib.eager as tfe
tfe.enable_eager_execution()
import pandas as pd
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
from IPython.display import display
init_notebook_mode(connected=True)
from numpy import genfromtxt
from plotly import tools

## Model Background

Given a set of points $\{x_i\}_{i=1}^N$, we're interested in the following type of model:

$$f \sim ~\mathcal{GP}(\mu(x), K(x, x))$$

$$ y(x_i) \sim ~ \mathcal{l}(f(x_i))$$

where $\mathcal{GP}(\mu(x), K(x, x))$ denotes a Gaussian process with mean $\mu$ and covariance kernel $K$, and $l$ denotes some likelihood. We primarily work with grid-structured data (this will be relevant at the inference step).

In [2]:
X = sim.sim_X_equispaced(D = 2, N_dim = 30)
iplot([go.Scatter(x = X[:,0], y = X[:,1], mode = 'markers', marker=dict(size = 3,))])

This is what a draw of $f$ looks like

In [3]:
f = sim.sim_f(X, k=RBF(variance=1.0, length_scale=30.))
trace_func = go.Scatter3d(x = X[:,0], y = X[:,1], z=f, mode = 'markers', marker=dict(size = 2,))
iplot([trace_func])


covariance is not positive-semidefinite.



Given f, below we draw y from a Poisson
$$y_i \sim Poisson(\exp(f(x_i) + \epsilon))$$

where $\epsilon \sim \mathcal{N}(0, 1)$

In [4]:
y = sim.poisson_draw(f, 0.5)
trace_draws = go.Scatter3d(x = X[:,0], y = X[:,1], z=y, mode = 'markers', marker=dict(size = 2,))
iplot([trace_draws])

## Dataset

We worked with a dataset from the Federal Election Commission. The data includes information on individual contributions to political campaigns

In [5]:
X_grid = genfromtxt('data/X_grid.csv', delimiter = ',')
hillary_counts = genfromtxt('data/y_hillary.csv', delimiter = ',')
obs_idx = genfromtxt('data/obs_idx.csv', delimiter = ',', dtype = np.int32)
trace_hillary_counts = go.Scatter3d(x = X_grid[obs_idx,1], y = X_grid[obs_idx,0], z=hillary_counts, mode = 'markers', marker=dict(size = 2,))
iplot([trace_hillary_counts])

## Inference

We focused on developing inference for the above type of model. Our implemented inference takes in the following:

<br>

* any differentiable, log-concave likelihood function $l$ (below we use a Poisson)


* a kernel function $k$


* grid data $X$, and observations $y$.

<br>

The reason for enforcing requiring a grid structure is that the inference exploits Kronecker structure in our covariance matrix.



It outputs a Laplace approximation of the posterior of $f$. First, let's try to recover the function from the simulated data on a grid.

In [6]:
mu = tf.ones([X.shape[0]], tf.float32)*np.mean(np.log(y))
kern = RBF(variance=1.0, length_scale=30.)
likelihood = PoissonLike()
y_tf = tfe.Variable(y, dtype = tf.float32)

ks = KroneckerSolver(mu, kern, likelihood, X, y_tf, verbose = True)
ks.run(10)

Iteration:  <tf.Variable 'Variable:0' shape=() dtype=int32, numpy=0>
 psi:  tf.Tensor(-314512.0, shape=(), dtype=float32)
step 1.0

Iteration:  tf.Tensor(1, shape=(), dtype=int32)
 psi:  tf.Tensor(-320851.0, shape=(), dtype=float32)
step 2.0

Iteration:  tf.Tensor(2, shape=(), dtype=int32)
 psi:  tf.Tensor(-324019.0, shape=(), dtype=float32)
step 1.0

Iteration:  tf.Tensor(3, shape=(), dtype=int32)
 psi:  tf.Tensor(-325024.0, shape=(), dtype=float32)
step 1.0

Iteration:  tf.Tensor(4, shape=(), dtype=int32)
 psi:  tf.Tensor(-325048.0, shape=(), dtype=float32)
step 0.00390625

Iteration:  tf.Tensor(5, shape=(), dtype=int32)
 psi:  tf.Tensor(-325048.0, shape=(), dtype=float32)
step 0.25

Iteration:  tf.Tensor(6, shape=(), dtype=int32)
 psi:  tf.Tensor(-325048.0, shape=(), dtype=float32)
step 0.0



(10, <tf.Tensor: id=73142, shape=(), dtype=int32, numpy=7>, 0.0)

In [7]:
trace_inferred = go.Scatter3d(x = X[:,0], y = X[:,1], z= np.array(ks.f), mode = 'markers', marker=dict(size = 2,))
fig = tools.make_subplots(rows=1, cols=3, specs=[[{'is_3d': True}, {'is_3d': True}, {'is_3d': True}]])
fig.append_trace(trace_func, 1, 1)
fig.append_trace(trace_draws, 1, 2)
fig.append_trace(trace_inferred, 1, 3)
iplot(fig)

This is the format of your plot grid:
[ (1,1) scene1 ]  [ (1,2) scene2 ]  [ (1,3) scene3 ]



In [8]:
mu = tf.ones([X_grid.shape[0]], tf.float32)*np.mean(np.log(hillary_counts))
ks = KroneckerSolver(mu, RBF(variance=5.0, length_scale=3.),
                     PoissonLike(), X_grid, tf.constant(hillary_counts, tf.float32), obs_idx=obs_idx, verbose = True)
ks.run(5)
hillary_func = go.Scatter3d(x = X_grid[obs_idx,1], y = X_grid[obs_idx,0], z=ks.f_pred.numpy()[obs_idx], mode = 'markers', marker=dict(size = 2))

fig = tools.make_subplots(rows=1, cols=2, specs=[[{'is_3d': True}, {'is_3d': True}]])
fig.append_trace(trace_hillary_counts, 1, 1)
fig.append_trace(hillary_func, 1, 2)
iplot(fig)

Iteration:  <tf.Variable 'Variable:0' shape=() dtype=int32, numpy=0>
 psi:  tf.Tensor(-1.51858e+07, shape=(), dtype=float32)
step 0.00390625

Iteration:  tf.Tensor(1, shape=(), dtype=int32)
 psi:  tf.Tensor(-1.91195e+07, shape=(), dtype=float32)
step 0.0625

Iteration:  tf.Tensor(2, shape=(), dtype=int32)
 psi:  tf.Tensor(-2.44463e+07, shape=(), dtype=float32)
step 0.5

Iteration:  tf.Tensor(3, shape=(), dtype=int32)
 psi:  tf.Tensor(-2.58093e+07, shape=(), dtype=float32)
step 2.0

Iteration:  tf.Tensor(4, shape=(), dtype=int32)
 psi:  tf.Tensor(-2.62468e+07, shape=(), dtype=float32)
step 1.0

This is the format of your plot grid:
[ (1,1) scene1 ]  [ (1,2) scene2 ]



## Model Selection and Criticism

In [10]:
ks.marginal()

<tf.Tensor: id=554631, shape=(), dtype=float32, numpy=26246700.0>