In [None]:
import torch
import torch.nn as nn
from torch.nn.parameter import Parameter
import torch.nn.functional as F
import torch.optim as optim

import matplotlib.pyplot as plt

import scipy.stats as scstats
import numpy as np

import sys
sys.path.append("..") # access to library

import neuroprob as mdl
from neuroprob import utils
from neuroprob import GP

import pickle

gpu_dev = 0
dev = utils.pytorch.get_device(gpu=gpu_dev)

import models
import HDC

plt.style.use(['paper.mplstyle'])

### Utils

In [None]:
def get_full_model(session_id, phase, cvdata, resamples, binsize, 
                   m, rcov, max_count, neurons, gpu):
    if session_id == 'Mouse12-120806':
        session_id = 0
    elif session_id == 'Mouse28-140313':
        session_id = 1
        
    if phase == 'wake':
        phase = 1
    elif phase == 'sleep':
        phase = 0

    mtype, ll_mode, r_mode, num_induc, inv_link, C, z_dims, delays, folds, cv_switch, basis_mode = m
    shared_W = False
    enc_layers, basis = models.hyper_params(basis_mode)
    kcv, ftrain, fcov, vtrain, vcov, batch_size = cvdata

    if ll_mode == 'U':
        mapping_net = models.net(C, basis, max_count, neurons, shared_W)
    else:
        mapping_net = None

    full_model, _ = models.set_model('HDC', max_count, mtype, r_mode, ll_mode, fcov, neurons, 
                                     tbin, ftrain, num_induc, batch_size=batch_size, inv_link=inv_link, 
                                     mapping_net=mapping_net, C=C, enc_layers=enc_layers)
    full_model.to(dev)


    name = 'HDC{}'.format(binsize)
    if shared_W:
        name += 'S'
    if basis_mode != 'ew':
        name += basis_mode
        
    model_name = '{}{}{}_{}_{}_{}_C={}_{}'.format(name, session_id, phase, mtype, ll_mode, r_mode, C, kcv)
    if cv_switch:
        model_name += '_'
    checkpoint = torch.load('./checkpoint/' + model_name, map_location='cuda:{}'.format(gpu))
    full_model.load_state_dict(checkpoint['full_model'])
    return full_model


### Data

In [None]:
session_id = 'Mouse28-140313'
phase = 'wake'

### Regression

In [None]:
modes = [('GP', 'U', 'hd', 8, 'identity', 3, [], False, 10, False, 'ew'), 
         ('GP', 'U', 'hd_w_s_t', 48, 'identity', 3, [], False, 10, False, 'ew'), 
         ('GP', 'U', 'hd_w_s_pos_t', 64, 'identity', 3, [], False, 10, False, 'ew'), 
         ('GP', 'IP', 'hd_w_s_pos_t', 64, 'exp', 1, [], False, 10, False, 'ew'), 
         ('GP', 'hNB', 'hd_w_s_pos_t', 64, 'exp', 1, [], False, 10, False, 'ew'), 
         ('GP', 'U', 'hd_w_s_pos_t', 64, 'identity', 3, [], False, 10, False, 'qd')]

bn = 40
rcov, neurons, tbin, resamples, rc_t, region_edge = HDC.get_dataset(session_id, phase, bn)

left_x = rcov[3].min()
right_x = rcov[3].max()
bottom_y = rcov[4].min()
top_y = rcov[4].max()

pick_neuron = list(range(neurons))

In [None]:
### statistics over the behaviour ###
avg_models = []
var_models = []
ff_models = []

binnings = [20, 40, 100, 200, 500]

for bn in binnings:

    rcov, neurons, tbin, resamples, rc_t, region_edge = HDC.get_dataset(session_id, phase, bn)
    max_count = int(rc_t.max())
    x_counts = torch.arange(max_count+1)
    
    mode = modes[2]
    cvdata = models.get_cv_sets(mode, [2], 5000, rc_t, resamples, rcov)[0]
    full_model = get_full_model(session_id, phase, cvdata, resamples, bn, 
                                mode, rcov, max_count, neurons, gpu=gpu_dev)


    avg_model = []
    var_model = []
    ff_model = []

    for b in range(full_model.inputs.batches):
        P_mc = models.compute_pred_P(full_model, b, pick_neuron, None, cov_samples=10, ll_samples=1, tr=0).cpu()

        avg = (x_counts[None, None, None, :]*P_mc).sum(-1)
        var = ((x_counts[None, None, None, :]**2*P_mc).sum(-1)-avg**2)
        ff = var/(avg+1e-12)
        avg_model.append(avg)
        var_model.append(var)
        ff_model.append(ff)

    avg_models.append(torch.cat(avg_model, dim=-1).mean(0).numpy())
    var_models.append(torch.cat(var_model, dim=-1).mean(0).numpy())
    ff_models.append(torch.cat(ff_model, dim=-1).mean(0).numpy())
    

In [None]:
# KS framework for regression models
CV = [2, 5, 8]

### KS test over binnings ###
Qq_bn = []
Zz_bn = []
R_bn = []
Rp_bn = []
mode = modes[2]

N = len(pick_neuron)
for kcv in CV:
    for en, bn in enumerate(binnings):
        cvdata = models.get_cv_sets(mode, [kcv], 3000, rc_t, resamples, rcov)[0]
        kcv_str, ftrain, fcov, vtrain, vcov, batch_size = cvdata
        cv_set = (ftrain, fcov, vtrain, vcov)
        time_steps = ftrain.shape[-1]

        full_model = get_full_model(session_id, phase, cvdata, resamples, bn, 
                                    mode, rcov, max_count, neurons, gpu=gpu_dev)

        if en == 0:
            q_ = []
            Z_ = []
            for b in range(full_model.inputs.batches): # predictive posterior
                P_mc = models.compute_pred_P(full_model, b, pick_neuron, None, cov_samples=10, ll_samples=1, tr=0)
                P = P_mc.mean(0).cpu().numpy()

                for n in range(N):
                    spike_binned = full_model.likelihood.spikes[b][0, pick_neuron[n], :].numpy()
                    q, Z = models.get_q_Z(P[n, ...], spike_binned, deq_noise=None)
                    q_.append(q)
                    Z_.append(Z)

            q = []
            Z = []
            for n in range(N):
                q.append(np.concatenate(q_[n::N]))
                Z.append(np.concatenate(Z_[n::N]))

        elif en > 0:
            cov_used = models.cov_used(mode[2], fcov, 'HDC')
            q = models.compute_count_stats(full_model, mode[1], tbin, ftrain, cov_used, pick_neuron, \
                                           traj_len=1, start=0, T=time_steps, bs=5000)
            Z = [utils.stats.q_to_Z(q_) for q_ in q]    

        Pearson_s = []
        for n in range(len(pick_neuron)):
            for m in range(n+1, len(pick_neuron)):
                r, r_p = scstats.pearsonr(Z[n], Z[m]) # Pearson r correlation test
                Pearson_s.append((r, r_p))

        r = np.array([p[0] for p in Pearson_s])
        r_p = np.array([p[1] for p in Pearson_s])

        Qq_bn.append(q)
        Zz_bn.append(Z)
        R_bn.append(r)
        Rp_bn.append(r_p)
        
        
q_DS_bn = []
T_DS_bn = []
T_KS_bn = []
for q in Qq_bn:
    for qq in q:
        T_DS, T_KS, sign_DS, sign_KS, p_DS, p_KS = utils.stats.KS_statistics(qq, alpha=0.05, alpha_s=0.05)
        T_DS_ll.append(T_DS)
        T_KS_ll.append(T_KS)
        
        Z_DS = T_DS/np.sqrt(2/(qq.shape[0]-1))
        q_DS_ll.append(utils.stats.Z_to_q(Z_DS))

Qq_bn = np.array(Qq_bn).reshape(len(CV), len(binnings), -1)
Zz_bn = np.array(Zz_bn).reshape(len(CV), len(binnings), -1)
R_bn = np.array(R_bn).reshape(len(CV), len(binnings), -1)
Rp_bn = np.array(Rp_bn).reshape(len(CV), len(binnings), -1)
        
q_DS_bn = np.array(q_DS_bn).reshape(len(CV), len(binnings), -1)
T_DS_bn = np.array(T_DS_bn).reshape(len(CV), len(binnings), -1)
T_KS_bn = np.array(T_KS_bn).reshape(len(CV), len(binnings), -1)

In [None]:
bn = 40
rcov, neurons, tbin, resamples, rc_t, region_edge = HDC.get_dataset(session_id, phase, bn)
max_count = int(rc_t.max())
x_counts = torch.arange(max_count+1)

HD_offset = -1.0 # global shift of head direction coordinates, makes plots better as the preferred head directions are not at axis lines

In [None]:
# cross validation
PLL_rg_ll = []
PLL_rg_cov = []
kcvs = [1, 2, 3, 5, 6, 8] # validation segments from splitting data into 10

beta = 0.0
batchsize = 5000

PLL_rg_ll = []
Ms = modes[2:5]
for mode in Ms: # likelihood
    
    for cvdata in models.get_cv_sets(mode, kcvs, batchsize, rc_t, resamples, rcov):
        kcv, ftrain, fcov, vtrain, vcov, batch_size = cvdata
        cv_set = (ftrain, fcov, vtrain, vcov)
    
        full_model = get_full_model(session_id, phase, cvdata, resamples, bn, mode, 
                                    rcov, max_count, neurons, gpu=gpu_dev)
        PLL_rg_ll.append(models.RG_pred_ll(full_model, mode[2], cv_set, bound='ELBO', 
                                           beta=beta, neuron_group=None, ll_mode='GH', ll_samples=100))
    
PLL_rg_ll = np.array(PLL_rg_ll).reshape(len(Ms), len(kcvs))

In [None]:
CV = [2, 5, 8] # validation segments from splitting data into 10

### KS test ###
Qq_ll = []
Zz_ll = []
R_ll = []
Rp_ll = []

N = len(pick_neuron)
for kcv in CV:
    for en, mode in enumerate(Ms):
        cvdata = models.get_cv_sets(mode, [kcv], 3000, rc_t, resamples, rcov)[0]
        kcv_str, ftrain, fcov, vtrain, vcov, batch_size = cvdata
        cv_set = (ftrain, fcov, vtrain, vcov)
        time_steps = ftrain.shape[-1]

        full_model = get_full_model(session_id, phase, cvdata, resamples, bn, 
                                    mode, rcov, max_count, neurons, gpu=gpu_dev)

        if en == 0:
            q_ = []
            Z_ = []
            for b in range(full_model.inputs.batches): # predictive posterior
                P_mc = models.compute_pred_P(full_model, b, pick_neuron, None, cov_samples=10, ll_samples=1, tr=0)
                P = P_mc.mean(0).cpu().numpy()

                for n in range(N):
                    spike_binned = full_model.likelihood.spikes[b][0, pick_neuron[n], :].numpy()
                    q, Z = models.get_q_Z(P[n, ...], spike_binned, deq_noise=None)
                    q_.append(q)
                    Z_.append(Z)

            q = []
            Z = []
            for n in range(N):
                q.append(np.concatenate(q_[n::N]))
                Z.append(np.concatenate(Z_[n::N]))

        elif en > 0:
            cov_used = models.cov_used(mode[2], fcov, 'HDC')
            q = models.compute_count_stats(full_model, mode[1], tbin, ftrain, cov_used, pick_neuron, \
                                            traj_len=1, start=0, T=time_steps, bs=5000)
            Z = [utils.stats.q_to_Z(q_) for q_ in q]    

        Pearson_s = []
        for n in range(len(pick_neuron)):
            for m in range(n+1, len(pick_neuron)):
                r, r_p = scstats.pearsonr(Z[n], Z[m]) # Pearson r correlation test
                Pearson_s.append((r, r_p))

        r = np.array([p[0] for p in Pearson_s])
        r_p = np.array([p[1] for p in Pearson_s])

        Qq_ll.append(q)
        Zz_ll.append(Z)
        R_ll.append(r)
        Rp_ll.append(r_p)
        
        
q_DS_ll = []
T_DS_ll = []
T_KS_ll = []
for q in Qq_ll:
    for qq in q:
        T_DS, T_KS, sign_DS, sign_KS, p_DS, p_KS = utils.stats.KS_statistics(qq, alpha=0.05, alpha_s=0.05)
        T_DS_ll.append(T_DS)
        T_KS_ll.append(T_KS)
        
        Z_DS = T_DS/np.sqrt(2/(qq.shape[0]-1))
        q_DS_ll.append(utils.stats.Z_to_q(Z_DS))

Qq_ll = np.array(Qq_ll).reshape(len(CV), len(Ms), -1)
Zz_ll = np.array(Zz_ll).reshape(len(CV), len(Ms), -1)
R_ll = np.array(R_ll).reshape(len(CV), len(Ms), -1)
Rp_ll = np.array(Rp_ll).reshape(len(CV), len(Ms), -1)
        
q_DS_ll = np.array(q_DS_ll).reshape(len(CV), len(Ms), -1)
T_DS_ll = np.array(T_DS_ll).reshape(len(CV), len(Ms), -1)
T_KS_ll = np.array(T_KS_ll).reshape(len(CV), len(Ms), -1)

In [None]:
PLL_rg_cov = []
kcvs = [1, 2, 3, 5, 6, 8] # validation segments from splitting data into 10

Ms = modes[:3]
for mode in Ms: # input space
    
    for cvdata in models.get_cv_sets(mode, kcvs, batchsize, rc_t, resamples, rcov):
        kcv, ftrain, fcov, vtrain, vcov, batch_size = cvdata
        cv_set = (ftrain, fcov, vtrain, vcov)
    
        full_model = get_full_model(session_id, phase, cvdata, resamples, bn, mode, 
                                    rcov, max_count, neurons, gpu=gpu_dev)
        PLL_rg_cov.append(models.RG_pred_ll(full_model, mode[2], cv_set, bound='ELBO', 
                                            beta=beta, neuron_group=None, ll_mode='GH', ll_samples=100))
    
PLL_rg_cov = np.array(PLL_rg_cov).reshape(len(Ms), len(kcvs))

In [None]:
# load Universal regression model
mode = modes[2]
kcv = -1 # fit on the full dataset
cvdata = models.get_cv_sets(mode, [kcv], 3000, rc_t, resamples, rcov)[0]
full_model = get_full_model(session_id, phase, cvdata, resamples, bn, mode, rcov, 
                            max_count, neurons, gpu=gpu_dev)

TT = tbin*resamples

In [None]:
# marginalized tuning curves
MC = 100
skip = 10


### hd ###
steps = 100
P_tot = models.marginalized_P(full_model, [np.linspace(0, 2*np.pi, steps)], [0], rcov, 10000, 
                              pick_neuron, MC=MC, skip=skip)
avg = (x_counts[None, None, None, :]*P_tot).sum(-1)
var = (x_counts[None, None, None, :]**2*P_tot).sum(-1)-avg**2
ff = var/avg

avgs = utils.signal.percentiles_from_samples(avg, percentiles=[0.05, 0.5, 0.95], 
                                             smooth_length=5, padding_mode='circular')
mhd_lower, mhd_mean, mhd_upper = [cs_.cpu().numpy() for cs_ in avgs]

ffs = utils.signal.percentiles_from_samples(ff, percentiles=[0.05, 0.5, 0.95], 
                                            smooth_length=5, padding_mode='circular')
mhd_fflower, mhd_ffmean, mhd_ffupper = [cs_.cpu().numpy() for cs_ in ffs]

# total variance decomposition
hd_mean_EV = avg.var(0).mean(-1)
hd_mean_VE = avg.mean(0).var(-1)
hd_ff_EV = avg.var(0).mean(-1)
hd_ff_VE = avg.mean(0).var(-1)

# TI
hd_mean_tf = (mhd_mean.max(dim=-1)[0] - mhd_mean.min(dim=-1)[0]) / (mhd_mean.max(dim=-1)[0] + mhd_mean.min(dim=-1)[0])
hd_ff_tf = (mhd_ffmean.max(dim=-1)[0] - mhd_ffmean.min(dim=-1)[0]) /(mhd_ffmean.max(dim=-1)[0] + mhd_ffmean.min(dim=-1)[0])


### omega ###
steps = 100
w_edge = (-rcov[1].min()+rcov[1].max())/2.
covariates_w = np.linspace(-w_edge, w_edge, steps)
P_tot = models.marginalized_P(full_model, [covariates_w], [1], rcov, 10000, 
                              pick_neuron, MC=MC, skip=skip)
avg = (x_counts[None, None, None, :]*P_tot).sum(-1)
var = (x_counts[None, None, None, :]**2*P_tot).sum(-1)-avg**2
ff = var/avg

mw_mean = avg.mean(0)
mw_ff = ff.mean(0)
w_mean_tf = (mw_mean.max(dim=-1)[0] - mw_mean.min(dim=-1)[0]) / (mw_mean.max(dim=-1)[0] + mw_mean.min(dim=-1)[0])
w_ff_tf = (mw_ff.max(dim=-1)[0] - mw_ff.min(dim=-1)[0]) /(mw_ff.max(dim=-1)[0] + mw_ff.min(dim=-1)[0])


### speed ###
steps = 100
P_tot = models.marginalized_P(full_model, [np.linspace(0, 30., steps)], [2], rcov, 10000, 
                              pick_neuron, MC=MC, skip=skip)
avg = (x_counts[None, None, None, :]*P_tot).sum(-1)
var = (x_counts[None, None, None, :]**2*P_tot).sum(-1)-avg**2
ff = var/avg

ms_mean = avg.mean(0)
ms_ff = ff.mean(0)
s_mean_tf = (ms_mean.max(dim=-1)[0] - ms_mean.min(dim=-1)[0]) / (ms_ff.max(dim=-1)[0] + ms_ff.min(dim=-1)[0])
s_ff_tf = (ms_ff.max(dim=-1)[0] - ms_ff.min(dim=-1)[0]) /(ms_ff.max(dim=-1)[0] + ms_ff.min(dim=-1)[0])


### time ###
steps = 100
P_tot = models.marginalized_P(full_model, [np.linspace(0, TT, steps)], [5], rcov, 10000, 
                              pick_neuron, MC=MC, skip=skip)
avg = (x_counts[None, None, None, :]*P_tot).sum(-1)
var = (x_counts[None, None, None, :]**2*P_tot).sum(-1)-avg**2
ff = var/avg

mt_mean = avg.mean(0)
mt_ff = ff.mean(0)
t_mean_tf = (mt_mean.max(dim=-1)[0] - mt_mean.min(dim=-1)[0]) / (mt_ff.max(dim=-1)[0] + mt_ff.min(dim=-1)[0])
t_ff_tf = (mt_ff.max(dim=-1)[0] - mt_ff.min(dim=-1)[0]) /(mt_ff.max(dim=-1)[0] + mt_ff.min(dim=-1)[0])



### position ###
grid_size_pos = (12, 10)
grid_shape_pos = [[left_x, right_x], [bottom_y, top_y]]

steps = np.product(grid_size_pos)
A, B = grid_size_pos

cov_list = [np.linspace(left_x, right_x, A)[:, None].repeat(B, axis=1).flatten(), 
            np.linspace(bottom_y, top_y, B)[None, :].repeat(A, axis=0).flatten()]
                      
P_tot = models.marginalized_P(full_model, cov_list, [3, 4], rcov, 10000, 
                              pick_neuron, MC=MC, skip=skip)
avg = (x_counts[None, None, None, :]*P_tot).sum(-1)
var = (x_counts[None, None, None, :]**2*P_tot).sum(-1)-avg**2
ff = var/avg

mpos_mean = avg.mean(0)
mpos_ff = ff.mean(0)
pos_mean_tf = (mpos_mean.max(dim=-1)[0] - mpos_mean.min(dim=-1)[0]) / (mpos_mean.max(dim=-1)[0] + mpos_mean.min(dim=-1)[0])
pos_ff_tf = (mpos_ff.max(dim=-1)[0] - mpos_ff.min(dim=-1)[0]) / (mpos_ff.max(dim=-1)[0] + mpos_ff.min(dim=-1)[0])
mpos_mean = mpos_mean.reshape(-1, A, B)
mpos_ff = mpos_ff.reshape(-1, A, B)

In [None]:
# conditional tuning curves
MC = 300
MC_ = 100


### head direction tuning ###
steps = 100
covariates = [np.linspace(0, 2*np.pi, steps)-HD_offset, 
              0.*np.ones(steps), 0.*np.ones(steps), 
              (left_x+right_x)/2.*np.ones(steps), (bottom_y+top_y)/2.*np.ones(steps), 
              0.*np.ones(steps)]

P_mc = models.compute_P(full_model, covariates, pick_neuron, MC=MC).cpu()


avg = (x_counts[None, None, None, :]*P_mc).sum(-1)
xcvar = ((x_counts[None, None, None, :]**2*P_mc).sum(-1)-avg**2)
ff = xcvar/avg

avgs = utils.signal.percentiles_from_samples(avg, percentiles=[0.05, 0.5, 0.95], 
                                             smooth_length=5, padding_mode='circular')
lower_hd, mean_hd, upper_hd = [cs_.cpu().numpy() for cs_ in avgs]

ffs = utils.signal.percentiles_from_samples(ff, percentiles=[0.05, 0.5, 0.95], 
                                            smooth_length=5, padding_mode='circular')
fflower_hd, ffmean_hd, ffupper_hd = [cs_.cpu().numpy() for cs_ in ffs]

covariates_hd = np.linspace(0, 2*np.pi, steps)



### hd_w ###
grid_size_hdw = (51, 41)
grid_shape_hdw = [[0, 2*np.pi], [-10., 10.]]

steps = np.product(grid_size_hdw)
A, B = grid_size_hdw
covariates = [np.linspace(0, 2*np.pi, A)[:, None].repeat(B, axis=1).flatten(), 
              np.linspace(-10., 10., B)[None, :].repeat(A, axis=0).flatten(), 
              0.*np.ones(steps), 
              (left_x+right_x)/2.*np.ones(steps), (bottom_y+top_y)/2.*np.ones(steps), 
              0.*np.ones(steps)]

P_mean = models.compute_P(full_model, covariates, pick_neuron, MC=MC).mean(0).cpu()
field_hdw = (x_counts[None, None, :]*P_mean).sum(-1).reshape(-1, A, B).numpy()



# compute preferred HD
grid = (101, 21)
grid_shape = [[0, 2*np.pi], [-10., 10.]]

steps = np.product(grid)
A, B = grid

w_arr = np.linspace(-10., 10., B)
covariates = [np.linspace(0, 2*np.pi, A)[:, None].repeat(B, axis=1).flatten(), 
              w_arr[None, :].repeat(A, axis=0).flatten(), 
              0.*np.ones(steps), 
              (left_x+right_x)/2.*np.ones(steps), (bottom_y+top_y)/2.*np.ones(steps), 
              0.*np.ones(steps)]

P_mean = models.compute_P(full_model, covariates, pick_neuron, MC=MC).mean(0).cpu()
field = (x_counts[None, None, :]*P_mean).sum(-1).reshape(-1, A, B).numpy()



Z = np.cos(covariates[0]) + np.sin(covariates[0])*1j # CoM angle
Z = Z[None, :].reshape(-1, A, B)
pref_hdw = (np.angle((Z*field).mean(1)) % (2*np.pi)) # neurons, w


# ATI
ATI = []
res_var = []
for k in range(neurons):
    _, a, shift, losses = utils.signal.circ_lin_regression(pref_hdw[k, :], w_arr/(2*np.pi), dev='cpu', iters=1000, lr=1e-2)
    ATI.append(-a)
    res_var.append(losses[-1])
ATI = np.array(ATI)
res_var = np.array(res_var)




### omega tuning ###
mean_w = []
lower_w = []
upper_w = []
ffmean_w = []
fflower_w = []
ffupper_w = []

steps = 100
w_edge = (-rcov[1].min()+rcov[1].max())/2.
covariates_w = np.linspace(-w_edge, w_edge, steps)
for en, n in enumerate(pick_neuron):
    covariates = [pref_hdw[en, len(w_arr)//2]*np.ones(steps), 
                  covariates_w, 
                  0.*np.ones(steps), 
                  (left_x+right_x)/2.*np.ones(steps), (bottom_y+top_y)/2.*np.ones(steps), 
                  0.*np.ones(steps)]

    P_mc = models.compute_P(full_model, covariates, [n], MC=MC)[:, 0, ...].cpu()

    avg = (x_counts[None, None, :]*P_mc).sum(-1)
    xcvar = ((x_counts[None, None, :]**2*P_mc).sum(-1)-avg**2)
    ff = xcvar/avg

    avgs = utils.signal.percentiles_from_samples(avg, percentiles=[0.05, 0.5, 0.95], 
                                                 smooth_length=5, padding_mode='replicate')
    lower, mean, upper = [cs_.cpu().numpy() for cs_ in avgs]

    ffs = utils.signal.percentiles_from_samples(ff, percentiles=[0.05, 0.5, 0.95], 
                                                smooth_length=5, padding_mode='replicate')
    fflower, ffmean, ffupper = [cs_.cpu().numpy() for cs_ in ffs]
    
    lower_w.append(lower)
    mean_w.append(mean)
    upper_w.append(upper)
    
    fflower_w.append(fflower)
    ffmean_w.append(ffmean)
    ffupper_w.append(ffupper)




### hd_t ###
grid_size_hdt = (51, 41)
grid_shape_hdt = [[0, 2*np.pi], [0., TT]]

steps = np.product(grid_size_hdt)
A, B = grid_size_hdt
covariates = [np.linspace(0, 2*np.pi, A)[:, None].repeat(B, axis=1).flatten(), 
              0.*np.ones(steps), 0.*np.ones(steps), 
              (left_x+right_x)/2.*np.ones(steps), (bottom_y+top_y)/2.*np.ones(steps), 
              np.linspace(0., TT, B)[None, :].repeat(A, axis=0).flatten()]

P_mean = models.compute_P(full_model, covariates, pick_neuron, MC=MC_).mean(0).cpu()
field_hdt = (x_counts[None, None, :]*P_mean).sum(-1).reshape(-1, A, B).numpy()



# drift and similarity matrix
grid = (201, 16)
grid_shape = [[0, 2*np.pi], [0., TT]]

steps = np.product(grid)
A, B = grid

t_arr = np.linspace(0., TT, B)
dt_arr = t_arr[1]-t_arr[0]
covariates = [np.linspace(0, 2*np.pi, A)[:, None].repeat(B, axis=1).flatten(), 
              0.*np.ones(steps), 0.*np.ones(steps), 
              (left_x+right_x)/2.*np.ones(steps), (bottom_y+top_y)/2.*np.ones(steps), 
              t_arr[None, :].repeat(A, axis=0).flatten()]

P_mean = models.compute_P(full_model, covariates, pick_neuron, MC=MC_).mean(0).cpu()
field = (x_counts[None, None, :]*P_mean).sum(-1).reshape(-1, A, B).numpy()



Z = np.cos(covariates[0]) + np.sin(covariates[0])*1j # CoM angle
Z = Z[None, :].reshape(-1, A, B)
E_exp = (Z*field).sum(-2)/field.sum(-2)
pref_hdt = (np.angle(E_exp) % (2*np.pi)) # neurons, t

tun_width = 1.-np.abs(E_exp)
amp_t = field.mean(-2) # mean amplitude
ampm_t = field.max(-2)

sim_mat = []
act = (field-field.mean(-2, keepdims=True))/field.std(-2, keepdims=True)
en = np.argsort(pref_hdt, axis=0)
for t in range(B):
    a = act[en[:, t], :, t]
    sim_mat = ((a[:, None, :]*a[None, ...]).mean(-1))



drift = []
res_var_drift = []
for k in range(len(pick_neuron)):
    _, a, shift, losses = utils.signal.circ_lin_regression(pref_hdt[k, :], t_arr/(2*np.pi)/1e2, 
                                                           dev='cpu', iters=1000, lr=1e-2)
    drift.append(a/1e2)
    res_var_drift.append(losses[-1])
drift = np.array(drift)
res_var_drift = np.array(res_var_drift)




### speed ###
mean_s = []
lower_s = []
upper_s = []
ffmean_s = []
fflower_s = []
ffupper_s = []
    
steps = 100
covariates_s = np.linspace(0, 30., steps)
for en, n in enumerate(pick_neuron):
    covariates = [pref_hdw[en, len(w_arr)//2]*np.ones(steps), 
                  0.*np.ones(steps), covariates_s, 
                  (left_x+right_x)/2.*np.ones(steps), (bottom_y+top_y)/2.*np.ones(steps), 
                  0.*np.ones(steps)]

    P_mc = models.compute_P(full_model, covariates, [n], MC=MC)[:, 0, ...].cpu()

    avg = (x_counts[None, None, :]*P_mc).sum(-1)
    xcvar = ((x_counts[None, None, :]**2*P_mc).sum(-1)-avg**2)
    ff = xcvar/avg

    avgs = utils.signal.percentiles_from_samples(avg, percentiles=[0.05, 0.5, 0.95], 
                                                 smooth_length=5, padding_mode='replicate')
    lower, mean, upper = [cs_.cpu().numpy() for cs_ in avgs]
    
    ffs = utils.signal.percentiles_from_samples(ff, percentiles=[0.05, 0.5, 0.95], 
                                                smooth_length=5, padding_mode='replicate')
    fflower, ffmean, ffupper = [cs_.cpu().numpy() for cs_ in ffs]

    lower_s.append(lower)
    mean_s.append(mean)
    upper_s.append(upper)
    
    fflower_s.append(fflower)
    ffmean_s.append(ffmean)
    ffupper_s.append(ffupper)
    
    
    
    
### time ###
mean_t = []
lower_t = []
upper_t = []
ffmean_t = []
fflower_t = []
ffupper_t = []
    
steps = 100
covariates_t = np.linspace(0, TT, steps)
for en, n in enumerate(pick_neuron):
    covariates = [pref_hdw[en, len(w_arr)//2]*np.ones(steps), 
                  0.*np.ones(steps), 0.*np.ones(steps), 
                  (left_x+right_x)/2.*np.ones(steps), (bottom_y+top_y)/2.*np.ones(steps), 
                  covariates_t]

    P_mc = models.compute_P(full_model, covariates, [n], MC=MC)[:, 0, ...].cpu()

    avg = (x_counts[None, None, :]*P_mc).sum(-1)
    xcvar = ((x_counts[None, None, :]**2*P_mc).sum(-1)-avg**2)
    ff = xcvar/avg

    avgs = utils.signal.percentiles_from_samples(avg, percentiles=[0.05, 0.5, 0.95], 
                                                 smooth_length=5, padding_mode='replicate')
    lower, mean, upper = [cs_.cpu().numpy() for cs_ in avgs]
    
    ffs = utils.signal.percentiles_from_samples(ff, percentiles=[0.05, 0.5, 0.95], 
                                                smooth_length=5, padding_mode='replicate')
    fflower, ffmean, ffupper = [cs_.cpu().numpy() for cs_ in ffs]

    lower_t.append(lower)
    mean_t.append(mean)
    upper_t.append(upper)
    
    fflower_t.append(fflower)
    ffmean_t.append(ffmean)
    ffupper_t.append(ffupper)
    
    
    
    
### pos ###
grid_shape_pos = [[left_x, right_x], [bottom_y, top_y]]
H = grid_shape_pos[1][1]-grid_shape_pos[1][0]
W = grid_shape_pos[0][1]-grid_shape_pos[0][0]
grid_size_pos = (int(41*W/H), 41)


steps = np.product(grid_size_pos)
A, B = grid_size_pos

field_pos = []
ff_pos = []
for en, n in enumerate(pick_neuron):
    covariates = [pref_hdw[en, len(w_arr)//2]*np.ones(steps), 
                  0.*np.ones(steps), 0.*np.ones(steps), 
                  np.linspace(left_x, right_x, A)[:, None].repeat(B, axis=1).flatten(), 
                  np.linspace(bottom_y, top_y, B)[None, :].repeat(A, axis=0).flatten(), 
                  t*np.ones(steps)]

    P_mc = models.compute_P(full_model, covariates, [n], MC=MC_)[:, 0, ...].cpu()
    avg = (x_counts[None, None, :]*P_mc).sum(-1).reshape(-1, A, B).numpy()
    var = (x_counts[None, None, :]**2*P_mc).sum(-1).reshape(-1, A, B).numpy()
    xcvar = (var-avg**2)

    field_pos.append(avg.mean(0))
    ff_pos.append((xcvar/(avg+1e-12)).mean(0))


field_pos = np.stack(field_pos)
ff_pos = np.stack(ff_pos)

In [None]:
# compute the Pearson correlation between Fano factors and mean firing rates
b = 1
Pearson_ff = []
ratio = []
for avg, ff in zip(avg_models[b], ff_models[b]):
    r, r_p = scstats.pearsonr(ff, avg) # Pearson r correlation test
    Pearson_ff.append((r, r_p))
    ratio.append(ff.std()/avg.std())

In [None]:
data_run = (
    avg_models, var_models, ff_models, 
    Pearson_ff, ratio, 
    PLL_rg_ll, PLL_rg_cov, 
    Qq_ll, Zz_ll, R_ll, Rp_ll, q_DS_ll, T_DS_ll, T_KS_ll, 
    sign_KS, sign_DS, 
    mhd_mean, mhd_ff, hd_mean_tf, hd_ff_tf, 
    mw_mean, mw_ff, w_mean_tf, w_ff_tf, 
    ms_mean, ms_ff, s_mean_tf, s_ff_tf, 
    mt_mean, mt_ff, t_mean_tf, t_ff_tf, 
    mpos_mean, mpos_ff, pos_mean_tf, pos_ff_tf, 
    covariates_hd, lower_hd, mean_hd, upper_hd, 
    fflower_hd, ffmean_hd, ffupper_hd, 
    covariates_s, lower_s, mean_s, upper_s, 
    fflower_s, ffmean_s, ffupper_s, 
    covariates_t, lower_t, mean_t, upper_t, 
    fflower_t, ffmean_t, ffupper_t, 
    covariates_w, lower_w, mean_w, upper_w, 
    fflower_w, ffmean_w, ffupper_w, 
    grid_size_pos, grid_shape_pos, field_pos, ff_pos, 
    grid_size_hdw, grid_shape_hdw, field_hdw, 
    grid_size_hdt, grid_shape_hdt, field_hdt, 
    pref_hdw, ATI, res_var, 
    pref_hdt, drift, res_var_drift, 
    tun_width, amp_t, ampm_t, sim_mat, 
    pick_neuron, max_count, tbin, rcov, region_edge
)

pickle.dump(data_run, open('./checkpoint/P_HDC_rg40.p', 'wb'))

### Noise correlations

In [None]:
bn = 40
rcov, neurons, tbin, resamples, rc_t, region_edge = HDC.get_dataset(session_id, phase, bn)

left_x = rcov[3].min()
right_x = rcov[3].max()
bottom_y = rcov[4].min()
top_y = rcov[4].max()

pick_neuron = list(range(neurons))

In [None]:
modes = [('GP', 'U', 'hd_w_s_pos_t', 64, 'identity', 3, [], False, 10, False, 'ew'), 
         ('GP', 'U', 'hd_w_s_pos_t_R1', 72, 'identity', 3, [6], False, 10, False, 'ew'), 
         ('GP', 'U', 'hd_w_s_pos_t_R2', 80, 'identity', 3, [6], False, 10, False, 'ew'), 
         ('GP', 'U', 'hd_w_s_pos_t_R3', 88, 'identity', 3, [6], False, 10, False, 'ew'), 
         ('GP', 'U', 'hd_w_s_pos_t_R4', 96, 'identity', 3, [6], False, 10, False, 'ew')]

In [None]:
### statistics over the behaviour ###
avg_models_z = []
var_models_z = []
ff_models_z = []

kcv = 2

bn = 40

for mode in modes:

    rcov, neurons, tbin, resamples, rc_t, region_edge = HDC.get_dataset(session_id, phase, bn)
    max_count = int(rc_t.max())
    x_counts = torch.arange(max_count+1)
    
    cvdata = models.get_cv_sets(mode, [kcv], 5000, rc_t, resamples, rcov)[0]
    full_model = get_full_model(session_id, phase, cvdata, resamples, bn, 
                                mode, rcov, max_count, neurons, gpu=gpu_dev)


    avg_model = []
    var_model = []
    ff_model = []

    for b in range(full_model.inputs.batches):
        P_mc = models.compute_pred_P(full_model, b, pick_neuron, None, cov_samples=10, ll_samples=1, tr=0).cpu()

        avg = (x_counts[None, None, None, :]*P_mc).sum(-1)
        var = ((x_counts[None, None, None, :]**2*P_mc).sum(-1)-avg**2)
        ff = var/(avg+1e-12)
        avg_model.append(avg)
        var_model.append(var)
        ff_model.append(ff)

    avg_models_z.append(torch.cat(avg_model, dim=-1).mean(0).numpy())
    var_models_z.append(torch.cat(var_model, dim=-1).mean(0).numpy())
    ff_models_z.append(torch.cat(ff_model, dim=-1).mean(0).numpy())
    

In [None]:
b = 1
Pearson_ffz = []
ratioz = []

for d in range(len(avg_models_z)):
    Pearson_ffz_ = []
    ratioz_ = []
    for avg, ff in zip(avg_models_z[d], ff_models_z[d]):
        r, r_p = scstats.pearsonr(ff, avg) # Pearson r correlation test
        Pearson_ffz_.append((r, r_p))
        ratioz_.append(ff.std()/avg.std())
        
    Pearson_ffz.append(Pearson_ffz_)
    ratioz.append(ratioz_)

In [None]:
binning = 40
rcov, neurons, tbin, resamples, rc_t, region_edge = HDC.get_dataset(session_id, phase, binning)
max_count = int(rc_t.max())
x_counts = torch.arange(max_count+1)

In [None]:
# ELBO for models of different dimensions
kcvs = [2, 5, 8] # get corresponding training sets
Ms = modes[:5]

elbo = []
for em, mode in enumerate(Ms):
    for cvdata in models.get_cv_sets(mode, kcvs, 3000, rc_t, resamples, rcov):
        kcv, ftrain, fcov, vtrain, vcov, batch_size = cvdata
        cv_set = (ftrain, fcov, vtrain, vcov)
        
        full_model = get_full_model(session_id, phase, cvdata, resamples, binning, 
                                    mode, rcov, max_count, neurons, gpu=gpu_dev)
        
        batches = full_model.likelihood.batches
        print(batches)
        elbo_ = []
        for b in range(batches):
            elbo_.append(full_model.objective(b, cov_samples=1, ll_mode='GH', bound='ELBO', neuron=None, 
                                              beta=1., ll_samples=100).data.cpu().numpy())
        elbo.append(np.array(elbo_).mean())
        
elbo = np.array(elbo).reshape(len(Ms), len(kcvs))

In [None]:
# cross validation for dimensionality
beta = 0.0
n_group = np.arange(5)
val_neuron = [n_group, n_group+5, n_group+10, n_group+15, n_group+20, n_group+25, np.arange(3)+30]
ncvx = 2
kcvs = [1, 2, 3, 5, 6, 8] # validation segments from splitting data into 10
Ms = modes[:5]

cv_pll = []
for em, mode in enumerate(Ms):
    for cvdata in models.get_cv_sets(mode, kcvs, 5000, rc_t, resamples, rcov):
        kcv, ftrain, fcov, vtrain, vcov, batch_size = cvdata
        cv_set = (ftrain, fcov, vtrain, vcov)
        
        if em > 0:
            for v_neuron in val_neuron:
                fac = len(n_group)/len(v_neuron)
                
                prev_ll = np.inf
                for tr in range(ncvx):
                    full_model = get_full_model(session_id, phase, cvdata, resamples, binning, 
                                                mode, rcov, max_count, neurons, gpu=gpu_dev)
                    mask = np.ones((neurons,), dtype=bool)
                    mask[v_neuron] = False
                    f_neuron = np.arange(neurons)[mask]
                    ll = models.LVM_pred_ll(full_model, mode[-5], mode[2], cv_set, f_neuron, v_neuron, 
                                            cov_MC=1, ll_MC=10, beta=beta, beta_z=0.0)[0]
                    if ll < prev_ll:
                        prev_ll = ll

                cv_pll.append(fac*prev_ll)
                
        else: # no latent
            for v_neuron in val_neuron:
                fac = len(n_group)/len(v_neuron)
                
                full_model = get_full_model(session_id, phase, cvdata, resamples, binning, 
                                            mode, rcov, max_count, neurons, gpu=gpu_dev)
                cv_pll.append(fac*models.RG_pred_ll(full_model, mode[2], cv_set, bound='ELBO', 
                                                    beta=beta, neuron_group=v_neuron, ll_mode='GH', ll_samples=100))

        
cv_pll = np.array(cv_pll).reshape(len(Ms), len(kcvs), len(val_neuron))

In [None]:
# get latent trajectories and drift timescale of neural tuning for 2D latent model
mode = modes[2]
cvdata = models.get_cv_sets(mode, [-1], 5000, rc_t, resamples, rcov)[0]
full_model = get_full_model(session_id, phase, cvdata, resamples, binning, mode, rcov, max_count, 
                            neurons, gpu=gpu_dev)

X_loc, X_std = full_model.inputs.eval_XZ()

X_c = X_loc[6]
X_s = X_std[6]
z_tau = tbin/(1-torch.sigmoid(full_model.inputs.p_mu_6).data.cpu().numpy())

t_lengths = full_model.mapping.kernel.kern1.lengthscale[:, 0, 0, -3].data.cpu().numpy()

In [None]:
# load regression model with most input dimensions
mode = modes[4]
cvdata = models.get_cv_sets(mode, [-1], 5000, rc_t, resamples, rcov)[0]
full_model = get_full_model(session_id, phase, cvdata, resamples, 40, mode, rcov, max_count, 
                            neurons, gpu=gpu_dev)

In [None]:
### head direction tuning ###
MC = 100

steps = 100
covariates = [np.linspace(0, 2*np.pi, steps), 
              0.*np.ones(steps), 0.*np.ones(steps), 
              (left_x+right_x)/2.*np.ones(steps), (bottom_y+top_y)/2.*np.ones(steps), 
              0.*np.ones(steps), 
              0.*np.ones(steps), 0.*np.ones(steps)]

P_mc = models.compute_P(full_model, covariates, pick_neuron, MC=MC).cpu()


avg = (x_counts[None, None, None, :]*P_mc).sum(-1).mean(0).numpy()
pref_hd = covariates[0][np.argmax(avg, axis=1)]

In [None]:
# marginalized tuning curves
rcovz = list(rcov) + [X_c[:, 0], X_c[:, 1]]
MC = 10
skip = 10



### z ###
step = 100
P_tot = models.marginalized_P(full_model, [np.linspace(-.2, .2, step)], [6], rcovz, 10000, 
                              pick_neuron, MC=MC, skip=skip)
avg = (x_counts[None, None, None, :]*P_tot).sum(-1)
var = (x_counts[None, None, None, :]**2*P_tot).sum(-1)-avg**2
ff = var/avg

mz1_mean = avg.mean(0)
mz1_ff = ff.mean(0)
z1_mean_tf = (mz1_mean.max(dim=-1)[0] - mz1_mean.min(dim=-1)[0]) / (mz1_mean.max(dim=-1)[0] + mz1_mean.min(dim=-1)[0])
z1_ff_tf = (mz1_ff.max(dim=-1)[0] - mz1_ff.min(dim=-1)[0]) /(mz1_ff.max(dim=-1)[0] + mz1_ff.min(dim=-1)[0])



step = 100
P_tot = models.marginalized_P(full_model, [np.linspace(-.2, .2, step)], [7], rcovz, 10000, 
                              pick_neuron, MC=MC, skip=skip)
avg = (x_counts[None, None, None, :]*P_tot).sum(-1)
var = (x_counts[None, None, None, :]**2*P_tot).sum(-1)-avg**2
ff = var/avg

mz2_mean = avg.mean(0)
mz2_ff = ff.mean(0)
z2_mean_tf = (mz2_mean.max(dim=-1)[0] - mz2_mean.min(dim=-1)[0]) / (mz2_mean.max(dim=-1)[0] + mz2_mean.min(dim=-1)[0])
z2_ff_tf = (mz2_ff.max(dim=-1)[0] - mz2_ff.min(dim=-1)[0]) /(mz2_ff.max(dim=-1)[0] + mz2_ff.min(dim=-1)[0])

In [None]:
# compute 2D latent model properties of tuning curves and TI to latent space
z_d = 2

if z_d == 1: ### latent ###
    mean_z = []
    lower_z = []
    upper_z = []
    ffmean_z = []
    fflower_z = []
    ffupper_z = []

    steps = 100
    covariates_z = np.linspace(-.2, .2, steps)
    for en, n in enumerate(pick_neuron):
        # x_t, y_t, s_t, th_t, hd_t, time_t
        covariates = [pref_hd[n]*np.ones(steps), 0.*np.ones(steps), np.ones(steps)*0., 
                      (left_x+right_x)/2.*np.ones(steps), (bottom_y+top_y)/2.*np.ones(steps), 
                      0.*np.ones(steps), covariates_z]

        P_mc = models.compute_P(full_model, covariates, [n], MC=1000).cpu()[:, 0, ...]

        avg = (x_counts[None, None, :]*P_mc).sum(-1)
        xcvar = ((x_counts[None, None, :]**2*P_mc).sum(-1)-avg**2)
        ff = xcvar/avg

        avgs = utils.signal.percentiles_from_samples(avg, percentiles=[0.05, 0.5, 0.95], 
                                                     smooth_length=5, padding_mode='replicate')
        lower, mean, upper = [cs_.cpu().numpy() for cs_ in avgs]

        ffs = utils.signal.percentiles_from_samples(ff, percentiles=[0.05, 0.5, 0.95], 
                                                smooth_length=5, padding_mode='replicate')
        fflower, ffmean, ffupper = [cs_.cpu().numpy() for cs_ in ffs]

        lower_z.append(lower)
        mean_z.append(mean)
        upper_z.append(upper)

        fflower_z.append(fflower)
        ffmean_z.append(ffmean)
        ffupper_z.append(ffupper)
    
else: ### 2d z ###
    grid_size_zz = (41, 41)
    grid_shape_zz = [[-.2, .2], [-.2, .2]]

    steps = np.product(grid_size_zz)
    A, B = grid_size_zz

    
    field_zz = []
    ff_zz = []
    t = 0
    for en, n in enumerate(pick_neuron):
        covariates = [pref_hd[n]*np.ones(steps), 
                      0.*np.ones(steps), 0.*np.ones(steps), 
                      (left_x+right_x)/2.*np.ones(steps), (bottom_y+top_y)/2.*np.ones(steps), 
                      t*np.ones(steps), 
                      np.linspace(-.2, .2, A)[:, None].repeat(B, axis=1).flatten(), 
                      np.linspace(-.2, .2, B)[None, :].repeat(A, axis=0).flatten()]

        P_mean = models.compute_P(full_model, covariates, [n], MC=100).mean(0).cpu()
        avg = (x_counts[None, :]*P_mean[0, ...]).sum(-1).reshape(A, B).numpy()
        var = (x_counts[None, :]**2*P_mean[0, ...]).sum(-1).reshape(A, B).numpy()
        xcvar = (var-avg**2)

        field_zz.append(avg)
        ff_zz.append(xcvar/avg)

    field_zz = np.stack(field_zz)
    ff_zz = np.stack(ff_zz)

In [None]:
# KS framework for latent models, including Fisher Z scores
CV = [2, 5, 8]
bn = 40



### KS test ###
Qq = []
Zz = []
R = []
Rp = []

N = len(pick_neuron)
for kcv in CV:
    for en, mode in enumerate(modes):
        cvdata = models.get_cv_sets(mode, [kcv], 3000, rc_t, resamples, rcov)[0]
        kcv_str, ftrain, fcov, vtrain, vcov, batch_size = cvdata
        cv_set = (ftrain, fcov, vtrain, vcov)
        time_steps = ftrain.shape[-1]

        full_model = get_full_model(session_id, phase, cvdata, resamples, bn, 
                                    mode, rcov, max_count, neurons, gpu=gpu_dev)

        q_ = []
        Z_ = []
        for b in range(full_model.inputs.batches): # predictive posterior
            P_mc = models.compute_pred_P(full_model, b, pick_neuron, None, cov_samples=10, ll_samples=1, tr=0)
            P = P_mc.mean(0).cpu().numpy()

            for n in range(N):
                spike_binned = full_model.likelihood.spikes[b][0, pick_neuron[n], :].numpy()
                q, Z = models.get_q_Z(P[n, ...], spike_binned, deq_noise=None)
                q_.append(q)
                Z_.append(Z)

        q = []
        Z = []
        for n in range(N):
            q.append(np.concatenate(q_[n::N]))
            Z.append(np.concatenate(Z_[n::N]))


        Pearson_s = []
        for n in range(len(pick_neuron)):
            for m in range(n+1, len(pick_neuron)):
                r, r_p = scstats.pearsonr(Z[n], Z[m]) # Pearson r correlation test
                Pearson_s.append((r, r_p))

        r = np.array([p[0] for p in Pearson_s])
        r_p = np.array([p[1] for p in Pearson_s])

        Qq.append(q)
        Zz.append(Z)
        R.append(r)
        Rp.append(r_p)


fisher_z = []
fisher_q = []
for en, r in enumerate(R):
    fz = 0.5*np.log((1+r)/(1-r))*np.sqrt(time_steps-3)
    fisher_z.append(fz)
    fisher_q.append(utils.stats.Z_to_q(fz))

    
q_DS_ = []
T_DS_ = []
T_KS_ = []
for q in Qq:
    for qq in q:
        T_DS, T_KS, sign_DS, sign_KS, p_DS, p_KS = utils.stats.KS_statistics(qq, alpha=0.05, alpha_s=0.05)
        T_DS_.append(T_DS)
        T_KS_.append(T_KS)
        
        Z_DS = T_DS/np.sqrt(2/(qq.shape[0]-1))
        q_DS_.append(utils.stats.Z_to_q(Z_DS))
        
        
fisher_z = np.array(fisher_z).reshape(len(CV), len(Ms), -1)
fisher_q = np.array(fisher_q).reshape(len(CV), len(Ms), -1)

Qq = np.array(Qq).reshape(len(CV), len(Ms), len(pick_neuron), -1)
Zz = np.array(Zz).reshape(len(CV), len(Ms), len(pick_neuron), -1)
R = np.array(R).reshape(len(CV), len(Ms), len(pick_neuron), -1)
Rp = np.array(Rp).reshape(len(CV), len(Ms), len(pick_neuron), -1)
        
q_DS_ = np.array(q_DS_).reshape(len(CV), len(Ms), len(pick_neuron), -1)
T_DS_ = np.array(T_DS_).reshape(len(CV), len(Ms), len(pick_neuron), -1)
T_KS_ = np.array(T_KS_).reshape(len(CV), len(Ms), len(pick_neuron), -1)

In [None]:
T_KS_fishq = []
p_KS_fishq = []
for q in fisher_q:
    for qq in q:
        _, T_KS, _, _, _, p_KS = utils.stats.KS_statistics(qq, alpha=0.05, alpha_s=0.05)
        T_KS_fishq.append(T_KS)
        p_KS_fishq.append(p_KS)
        
T_KS_fishq = np.array(T_KS_fishq).reshape(len(CV), len(Ms))
p_KS_fishq = np.array(p_KS_fishq).reshape(len(CV), len(Ms))
        
        
T_KS_ks = []
p_KS_ks = []
for q in Qq:
    for qq in q:
        for qqq in qq:
            _, T_KS, _, _, _, p_KS = utils.stats.KS_statistics(qqq, alpha=0.05, alpha_s=0.05)
            T_KS_ks.append(T_KS)
            p_KS_ks.append(p_KS)
        
T_KS_ks = np.array(T_KS_ks).reshape(len(CV), len(Ms), len(pick_neuron))
p_KS_ks = np.array(p_KS_ks).reshape(len(CV), len(Ms), len(pick_neuron))

In [None]:
# delayed noise or spatiotemporal correlations
NN = len(pick_neuron)
delays = np.arange(5)
R_mat_spt = np.empty((len(Ms), len(delays), NN, NN))
R_mat_sptp = np.empty((len(Ms), len(delays), NN, NN))

kcv_ind = 1
for d, Z_ in enumerate(Zz[kcv_ind]):
    steps = len(Z_[0])-len(delays)
    
    for en, t in enumerate(delays):
        Pearson_s = []
        for n in range(NN):
            for m in range(NN):
                r, r_p = scstats.pearsonr(Z_[n][t:t+steps], Z_[m][:-len(delays)]) # Pearson r correlation test
                R_mat_spt[d, en, n, m] = r
                R_mat_sptp[d, en, n, m] = r_p


In [None]:
# compute timescales for input dimensions from ACG
delays = 5000
Tsteps = rcov[0].shape[0]
L = Tsteps-delays+1
acg_rc = []

for rc in rcov[:1]: # angular
    acg = np.empty(delays)
    for d in range(delays):
        A = rc[d:d+L]
        B = rc[:L]
        acg[d] = utils.stats.corr_circ_circ(A, B)
    acg_rc.append(acg)

for rc in rcov[1:-1]:
    acg = np.empty(delays)
    for d in range(delays):
        A = rc[d:d+L]
        B = rc[:L]
        acg[d] = ((A-A.mean())*(B-B.mean())).mean()/A.std()/B.std()
    acg_rc.append(acg)
    

acg_z = []
for rc in X_c.T:
    acg = np.empty(delays)
    for d in range(delays):
        A = rc[d:d+L]
        B = rc[:L]
        acg[d] = ((A-A.mean())*(B-B.mean())).mean()/A.std()/B.std()
    acg_z.append(acg)
    
    
timescales = []

for d in range(len(rcov)-1):
    timescales.append(np.where(acg_rc[d] < np.exp(-1))[0][0]*tbin)
    
for d in range(X_c.shape[-1]):
    timescales.append(np.where(acg_z[d] < np.exp(-1))[0][0]*tbin)


In [None]:
data_run = (
    avg_models_z, var_models_z, ff_models_z, 
    Pearson_ffz, ratioz, 
    X_c, X_s, cv_pll, elbo, z_tau, pref_hd, 
    grid_size_zz, grid_shape_zz, field_zz, ff_zz, 
    mz1_mean, mz1_ff, z1_mean_tf, z1_ff_tf, 
    mz2_mean, mz2_ff, z2_mean_tf, z2_ff_tf, 
    q_DS_, T_DS_, T_KS_, Qq, Zz, R, Rp, fisher_z, fisher_q, 
    T_KS_fishq, p_KS_fishq, T_KS_ks, p_KS_ks, 
    R_mat_spt, R_mat_sptp, 
    timescales, acg_rc, acg_z, t_lengths
)

pickle.dump(data_run, open('./checkpoint/P_HDC_nc40.p', 'wb'))

### Latent variable modeling

In [None]:
binsize = 100
rcov_lvm, neurons, tbin, resamples, rc_t, _ = HDC.get_dataset(session_id, phase, binsize)
max_count = int(rc_t.max())
rhd_t = rcov_lvm[0]

In [None]:
modes = [('GP', 'U', 'T1', 8, 'identity', 3, [0], False, 10, False, 'ew'), 
         ('GP', 'IP', 'T1', 8, 'exp', 1, [0], False, 10, False, 'ew'), 
         ('GP', 'hNB', 'T1', 8, 'exp', 1, [0], False, 10, False, 'ew')]

In [None]:
# likelihood CV over subgroups of neurons as well as validation runs
beta = 0.0
n_group = np.arange(5)
ncvx = 2
val_neuron = [n_group, n_group+5, n_group+10, n_group+15, n_group+20, n_group+25, np.arange(3)+30]
kcvs = [1, 2, 3, 5, 6, 8] # validation segments from splitting data into 10

LVM_cv_ll = []
for kcv in kcvs:
    for mode in modes:
        cvdata = models.get_cv_sets(mode, [kcv], 5000, rc_t, resamples, rcov)[0]
        _, ftrain, fcov, vtrain, vcov, batch_size = cvdata
        cv_set = (ftrain, fcov, vtrain, vcov)

        for v_neuron in val_neuron:
            fac = len(n_group)/len(v_neuron)

            prev_ll = np.inf
            for tr in range(ncvx):
                full_model = get_full_model(session_id, phase, cvdata, resamples, 100, 
                                            mode, rcov_lvm, max_count, neurons, gpu=gpu_dev)
                mask = np.ones((neurons,), dtype=bool)
                mask[v_neuron] = False
                f_neuron = np.arange(neurons)[mask]
                ll = models.LVM_pred_ll(full_model, mode[-5], mode[2], cv_set, f_neuron, v_neuron, 
                                        beta=beta, beta_z=0.0)[0]
                if ll < prev_ll:
                    prev_ll = ll

            LVM_cv_ll.append(fac*prev_ll)
        
LVM_cv_ll = np.array(LVM_cv_ll).reshape(len(kcvs), len(modes), len(val_neuron))

In [None]:
def circ_drift_regression(x, z, t, topology, dev='cpu', iters=1000, lr=1e-2, a_fac=1):
    t = torch.tensor(t, device=dev)
    X = torch.tensor(x, device=dev)
    Z = torch.tensor(z, device=dev)
        
    lowest_loss = np.inf
    for sign in [1, -1]: # select sign automatically
        shift = Parameter(torch.zeros(1, device=dev))
        a = Parameter(torch.zeros(1, device=dev))

        optimizer = optim.Adam([a, shift], lr=lr)
        losses = []
        for k in range(iters):
            optimizer.zero_grad()
            Z_ = t*a_fac*a + shift + sign*Z
            loss = (utils.latent.metric(Z_, X, topology)**2).mean()
            loss.backward()
            optimizer.step()
            losses.append(loss.cpu().item())

        l_ = loss.cpu().item()
        
        if l_ < lowest_loss:
            lowest_loss = l_
            a_ = a.cpu().item()
            shift_ = shift.cpu().item()
            sign_ = sign
            losses_ = losses

    return a_fac*a_, sign_, shift_, losses_

In [None]:
# trajectory regression to align to data and compute drifts
topology = 'torus'
cvK = 3
CV = [0, 1, 2]

RMS_cv = []
drifts_lv = []
for mode in modes:
    cvdata = models.get_cv_sets(mode, [-1], 5000, rc_t, resamples, rcov_lvm)[0]
    kcv, ftrain, fcov, vtrain, vcov, batch_size = cvdata
    cv_set = (ftrain, fcov, vtrain, vcov)
        
    full_model = get_full_model(session_id, phase, cvdata, resamples, 100, 
                                mode, rcov_lvm, max_count, neurons, gpu=gpu_dev)

    X_loc, X_std = full_model.inputs.eval_XZ()
    cvT = X_loc[0].shape[0]
    tar_t = rhd_t[:cvT]
    lat = X_loc[0]
    
    for rn in CV:
        fit_range = np.arange(cvT//cvK) + rn*cvT//cvK

        drift, sign, shift, losses = circ_drift_regression(tar_t[fit_range], lat[fit_range], fit_range*tbin, 
                                                      topology, dev=dev, a_fac=1e-5)
        
        #plt.plot(losses)
        #plt.show()
        mask = np.ones((cvT,), dtype=bool)
        mask[fit_range] = False
        
        lat_t = torch.tensor((np.arange(cvT)*tbin*drift + shift + sign*lat) % (2*np.pi))
        D = (utils.latent.metric(torch.tensor(tar_t)[mask], lat_t[mask], topology)**2)
        RMS_cv.append(D.mean().item())
        drifts_lv.append(drift)


RMS_cv = np.array(RMS_cv).reshape(len(modes), len(CV))
drifts_lv = np.array(drifts_lv).reshape(len(modes), len(CV))

In [None]:
# compute delays in latent trajectory w.r.t. data, see which one fits best in RMS
topology = 'torus'
cvK = 3
CV = [0, 1, 2]

D = 5
delays = np.arange(-D, D+1)
delay_RMS = []
mode = modes[0]

for delay in delays:
    cvdata = models.get_cv_sets(mode, [-1], 5000, rc_t, resamples, rcov_lvm)[0]
    kcv, ftrain, fcov, vtrain, vcov, batch_size = cvdata
    cv_set = (ftrain, fcov, vtrain, vcov)
        
    full_model = get_full_model(session_id, phase, cvdata, resamples, 100, 
                                mode, rcov_lvm, max_count, neurons, gpu=gpu_dev)

    X_loc, X_std = full_model.inputs.eval_XZ()
    cvT = X_loc[0].shape[0]-len(delays)+1
    tar_t = rhd_t[D+delay:cvT+D+delay]
    lat = X_loc[0][D:cvT+D]
    
    for rn in CV:
        fit_range = np.arange(cvT//cvK) + rn*cvT//cvK

        drift, sign, shift, _ = circ_drift_regression(tar_t[fit_range], lat[fit_range], fit_range*tbin, 
                                                      topology, dev=dev, a_fac=1e-5)
        
        mask = np.ones((cvT,), dtype=bool)
        mask[fit_range] = False
        
        lat_ = torch.tensor((np.arange(cvT)*tbin*drift + shift + sign*lat) % (2*np.pi))
        Dd = (utils.latent.metric(torch.tensor(tar_t)[mask], lat_[mask], topology)**2)
        delay_RMS.append(Dd.mean().item())


delay_RMS = np.array(delay_RMS).reshape(len(delays), len(CV))

In [None]:
# get the latent inferred trajectory
mode = modes[0]
topology = 'torus'


cvdata = models.get_cv_sets(mode, [-1], 5000, rc_t, resamples, rcov_lvm)[0]
kcv, ftrain, fcov, vtrain, vcov, batch_size = cvdata
cv_set = (ftrain, fcov, vtrain, vcov)

full_model = get_full_model(session_id, phase, cvdata, resamples, 100, 
                            mode, rcov_lvm, max_count, neurons, gpu=gpu_dev)

X_loc, X_std = full_model.inputs.eval_XZ()

tar_t = rhd_t
lat = X_loc[0]

drift, sign, shift, _ = circ_drift_regression(tar_t[fit_range], lat[fit_range], fit_range*tbin, 
                                              topology, dev=dev, a_fac=1e-5)

lat_t = ((np.arange(rhd_t.shape[0])*tbin*drift + shift + sign*lat) % (2*np.pi))
lat_t_std = X_std[0]

In [None]:
data_run = (
    lat_t, lat_t_std, delay_RMS, RMS_cv, LVM_cv_ll, drifts_lv, rcov_lvm
)

pickle.dump(data_run, open('./checkpoint/P_HDC_lat.p', 'wb'))