In [None]:
%load_ext autoreload
%autoreload 2

import torch
import torch.optim as optim

import matplotlib.pyplot as plt
import numpy as np


import sys
sys.path.append("..")

### GPLVM
Here we run GPLVM on a biological dataset to perform dimensionality reduction. Aims to reproduce the tutorial https://pyro.ai/examples/gplvm.html.

In [None]:
import pandas as pd

from neuroprob.utils import tools
import neuroprob.models as mdl

In [None]:
# license: Copyright (c) 2014, the Open Data Science Initiative
# license: https://www.elsevier.com/legal/elsevier-website-terms-and-conditions
URL = "https://raw.githubusercontent.com/sods/ods/master/datasets/guo_qpcr.csv"

df = pd.read_csv(URL, index_col=0)
print("Data shape: {}\n{}\n".format(df.shape, "-" * 21))
print("Data labels: {}\n{}\n".format(df.index.unique().tolist(), "-" * 86))
print("Show a small subset of the data:")
df.head()

In [None]:
data = torch.tensor(df.values)
# we need to transpose data to correct its shape
y = data.t()

capture_time = y.new_tensor([int(cell_name.split(" ")[0]) for cell_name in df.index.values])
# we scale the time into the interval [0, 1]
time = capture_time.log2() / 6

y_dim = y.shape[0]
num_data = y.shape[1]

sample_bin = 1.

In [None]:
# GPLVM
l = np.ones(1) # lengthscales
v = np.ones(1) # variances
kernels_tuples = [v, 
                  ('RBF', 'euclid', np.array([l, l]))]
prior_tuples = [('Normal', # type
                 'euclid', # topology
                 0.0, # mean
                 0.1, # std
                 False, # learnable mean
                 False), # learnable std
               ('Normal', 'euclid', 0.0, 0.1, False, False)]
variational_types = [('Normal', 'euclid'), 
                     ('Normal', 'euclid')]

# initial variational parameters
ini_X = [np.empty((num_data, 2)), np.empty((num_data, 2))]
ini_X[0][:, 0] = np.array(time) # mean
ini_X[0][:, 1] = 1.0*np.ones(num_data) # std (before softplus)
ini_X[1][:, 0].fill(0.0) # mean
ini_X[1][:, 1] = 1.0*np.ones(num_data) # std (before softplus)

# inducing_points
num_induc = 32
ind = np.random.randint(num_data, size=32)
inducing_points = ini_X[0][None, ind, :]

gp_lvm = mdl.nonparametrics.Gaussian_process(y_dim, inducing_points, kernels_tuples, 
                                          prior_tuples, variational_types, inv_link='identity', 
                                          shared_kernel_params=True, full_cov_fit=False)
gp_lvm.set_params(sample_bin, jitter=1e-4)

log_var = -1.0*np.ones(y_dim)
noise_dist = mdl.likelihoods.Gaussian(y_dim, 'identity', log_var)
noise_dist.set_params(sample_bin)

# NLL model
glm = mdl.inference.nll_optimized([gp_lvm], noise_dist)

In [None]:
dev = 'cuda:0'

# preprocessing
glm.preprocess(ini_X, num_data, np.array(y), batch_size=num_data)
glm.to(dev) # to device

sch = lambda o: optim.lr_scheduler.MultiplicativeLR(o, lambda e: 0.9)
opt_tuple = (optim.Adam, 100, sch)
opt_lr_dict = {'default': 1e-2}
glm.set_optimizers(opt_tuple, opt_lr_dict)

In [None]:
# fitting
losses = glm.fit(5000, margin=1e0, premature=100, cov_samples=1, ll_samples=1)

plt.figure()
plt.plot(losses)
plt.xlabel('Epoch')
plt.ylabel('NLL')
plt.show()

In [None]:
X_loc, X_std = gp_lvm.eval_X()

colors = plt.get_cmap("tab10").colors[::-1]
labels = df.index.unique()

for i, label in enumerate(labels):
    X_i = X_loc[0][df.index == label]
    Y_i = X_loc[1][df.index == label]
    plt.scatter(X_i[:], Y_i[:], c=[colors[i]], label=label)
    
plt.legend()
plt.show()

### Head direction Poisson bumps

### Grid cells

In [None]:
# Synthetic grid of place cells
sample_bin, track_samples, x_t, y_t, s_t, hd_t, theta_t, dir_t, \
            syn_t_spike, spike_samples, units, \
            left_x, right_x, bottom_y, top_y = pickle.load(open('../data/synthetic/grid_IPP.p', 'rb'))

arena_width = right_x - left_x
arena_height = top_y - bottom_y

neurons = np.arange(units)
use_samples = 3000000
used_t_spike = [syn_t_spike[n][syn_t_spike[n] < use_samples] for n in neurons]
units_used = len(neurons)
unit_used = np.arange(units_used)
show_neurons = [0, 1, 2, 3, 4]
behav_data = (x_t, y_t)

bin_size = 100
tbin, resamples, rc_t, (rx_t, ry_t) = neural_utils.BinTrain(bin_size, sample_bin, used_t_spike, 
                                                                    use_samples, behav_data, average_behav=False)

In [None]:
folds = 10
cv_set = neural_utils.SpikeTrainCV(folds, rc_t, resamples, [rx_t, ry_t])
fit_rc_t, fit_behav, dec_rc_t, dec_behav = cv_set[2]

In [None]:
T = 30000
plt.plot(fit_behav[0][:T], fit_behav[1][:T])
plt.show()

In [None]:
# GPLVM
units_ = units_used#1
l = 100.*np.ones(units_)
v = np.ones(units_)

kernels_tuples = [v, 
                  ('RBF', 'euclid', np.array([l, l]))]
prior_tuple = [('Uniform', 'euclid', 0.0, right_x, False, False), 
                ('Uniform', 'euclid', 0.0, top_y, False, False)]
#prior_tuple = [('RW', 'euclid', 0.0, 1.0, True, True), 
#               ('RW', 'euclid', 0.0, 1.0, True, True)]
variational_types = [('Normal', 'euclid'), 
                     ('Normal', 'euclid')]

num_induc = 16
inducing_points = np.array([left_x + arena_width*np.random.rand(num_induc), \
                            bottom_y + arena_height*np.random.rand(num_induc)]).T[None, ...].repeat(units_used, axis=0)

gp_lvm = mdl.nonparametrics.Gaussian_process(units_used, inducing_points, kernels_tuples, 
                                          prior_tuple, variational_types, mean=np.zeros(units_used), 
                                          shared_kernel_params=False, cov_type='factorized', whiten=False)
gp_lvm.set_params(tbin, jitter=1e-5)


likelihood = mdl.likelihoods.Poisson(units_used, 'exp')
likelihood.set_params(tbin)

# NLL model
glm = mdl.inference.nll_optimized([gp_lvm], likelihood)

In [None]:
# fit model
ini_X = fit_behav
fit_samples = fit_behav[0].shape[0]
cov_samples = 1

glm.preprocess(ini_X, fit_samples, fit_rc_t, batch_size=10000)
glm.to(dev)

sch = lambda o: optim.lr_scheduler.MultiplicativeLR(o, lambda e: 0.9)
opt_tuple = (optim.Adam, 200, sch)
opt_lr_dict = {'default': 5*1e-2}
glm.set_optimizers(opt_tuple, opt_lr_dict)

# fitting
annealing = lambda x: 1.0#min(1.0, 0.005*x)
losses = glm.fit(3000, margin=1e1, premature=100, anneal_func=annealing, 
                 cov_samples=cov_samples, ll_samples=1)

plt.figure()
plt.plot(losses)
plt.xlabel('Epoch')
plt.ylabel('NLL')
plt.show()

In [None]:
# show fits
grid_size = (50, 40)
#grid_shape = [[left_x, right_x], [bottom_y, top_y]]
grid_shape = [[left_x, right_x], \
              [bottom_y, top_y]]
show_neurons = np.arange(5)

fig = plt.figure(figsize=(8, 2))
nrows = 1
ncols = 5
axes = [ fig.add_subplot(nrows, ncols, r * ncols + c + 1) for r in range(0, nrows) for c in range(0, ncols) ]

for neuron in show_neurons:
    
    def func(pos):
        prevshape = pos.shape[1:]
        x = pos[0].flatten()
        y = pos[1].flatten()
        covariates = np.array([x, y])
        return gp_lvm.eval_rate(covariates, [neuron])[0].reshape(*prevshape)

    if neuron == 4:
        #cbar= True
        ticktitle='Firing rate (Hz)'
    else:
        #cbar = False
        ticktitle=''
        
    _, field = tools.compute_mesh(grid_size, grid_shape, func)
    grid_shape = [[left_x, right_x], [bottom_y, top_y]]
    _, ax = tools.visualize_field(field, grid_shape, ticktitle=ticktitle, figax=(fig, axes[neuron]))
    if neuron == 0:
        ylabel=r'$y$'
    else:
        ylabel=None
    tools.decorate_ax(ax, ylabel=ylabel, xlim=[left_x, right_x], ylim=[bottom_y, top_y],
                      spines=[False, False, False, False])


fig.text(0.51, 0.0, r'$x$', ha='center', va='center', fontsize=12)

fig.subplots_adjust(hspace=0.3, wspace=0.3)

#for n in range(2*ncols, 4*ncols):
#    box = axes[n].get_position(transform=axes[n].transAxes)
#    box.x0 = box.x0 - 0.1
#    box.x1 = box.x1
#    axes[n].set_position(box, transform=axes[n].transAxes)

#plt.savefig('output/syn_pop.svg', bbox_inches='tight')
#plt.savefig('output/syn_pop.pdf', bbox_inches='tight')
plt.show()

In [None]:
# preprocessing
dec_samples = dec_behav[0].shape[0]

ini_X = [(np.random.randn(dec_samples, 2)), 
         (np.random.randn(dec_samples, 2))]
ini_X[0][:, 1].fill(1.0) # std of variational distribution
ini_X[1][:, 1].fill(1.0) # std of variational distribution
cov_samples = 16

glm.preprocess(ini_X, dec_samples, dec_rc_t, batch_size=10000)
glm.to(dev)

sch = lambda o: optim.lr_scheduler.MultiplicativeLR(o, lambda e: 0.9)
opt_tuple = (optim.Adam, 200, sch)
opt_lr_dict = {'default': 0., 'lv_mu_0': 1e-1, 'lv_mu_1': 1e-1, 
               'lv_std_0': 5*1e-3, 'lv_std_1': 5*1e-3}
glm.set_optimizers(opt_tuple, opt_lr_dict)

In [None]:
# fitting
annealing = lambda x: 1.0#min(1.0, 0.005*x)
losses = glm.fit(3000, margin=1e1, premature=100, anneal_func=annealing, 
                 cov_samples=cov_samples, ll_samples=1)

plt.figure()
plt.plot(losses)
plt.xlabel('Epoch')
plt.ylabel('NLL')
plt.show()

In [None]:
X_loc, X_std = gp_lvm.eval_X()

plt.plot(X_loc[0], X_loc[1])

In [None]:
T = 100
start = 0
from matplotlib.patches import Ellipse

# model trajectory
ells = [Ellipse(xy=[X_loc[0][t], X_loc[1][t]], 
                width=2*X_std[0][t], height=2*X_std[1][t], angle=0.)#rnd.rand()*360)
        for t in np.arange(start, T, 1)]

fig = plt.figure(0)
ax = fig.add_subplot(111, aspect='equal')
for e in ells:
    ax.add_artist(e)
    e.set_clip_box(ax.bbox)
    e.set_alpha(0.3)
    e.set_facecolor('tab:blue')

#ax.plot(X_loc[0][start:T], X_loc[1][start:T], 'r-')# marker='.', s=0)
ax.plot(rx_t[start:T], ry_t[start:T], 'r-') # true trajectory

plt.show()


tt = tbin*np.arange(start, T)
plt.plot(tt, X_loc[0][start:T])
plt.errorbar(tt, X_loc[0][start:T], yerr=X_std[0][start:T], linestyle='', 
            elinewidth=1, color='tab:blue', alpha=0.5)
plt.plot(tt, (rx_t[start:T] - RX_mu)/RX_std*iX_std + iX_mu, 'r--')
plt.show()