In [15]:
# This snippet of code properly adds the working source root path to python's path
# so you no longer have to install spykshrk through setuptools
import sys, os
root_depth = 2
notebook_dir = globals()['_dh'][0]
root_path = os.path.abspath(os.path.join(notebook_dir, '../'*root_depth))
# Add to python's path
try:
    while True:
        sys.path.remove(root_path)
except ValueError:
    # no more root paths
    pass
sys.path.append(root_path)
# Alternatively set root path as current working directory
#os.chdir(root_path)

In [16]:
# General utilities
import json
import functools
import math

# Modules for scientific computing
import numpy as np
import scipy as sp
import scipy.stats
import pandas as pd

# Modules for visualization
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import holoviews as hv

# Modules for distributed computing
import dask
import dask.dataframe as dd
import dask.array as da

# Spykshrk modules for data analysis
from spykshrk.franklab.data_containers import FlatLinearPosition, SpikeFeatures, \
        EncodeSettings, pos_col_format, SpikeObservation
from spykshrk.franklab.pp_decoder.util import normal_pdf_int_lookup, gaussian
from spykshrk.franklab.pp_decoder.pp_clusterless import OfflinePPEncoder, OfflinePPDecoder
from spykshrk.franklab.pp_decoder.visualization import DecodeVisualizer

# Visualization and display settings
hv.extension('matplotlib')
hv.extension('bokeh')
pd.set_option('display.precision', 4)
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 15)

In [3]:
#%pdb

In [4]:
# Setup DASK workers using DASK distributed processes

try:
    cluster.close()
    client.close()
except:
    print("No cluster or client running")
    
from dask.distributed import Client, LocalCluster

#cluster = LocalCluster(n_workers=2, threads_per_worker=1, memory_limit=50e9, memory_target_fraction=0.01, 
#                       memory_pause_fraction=0.05)
cluster = LocalCluster(n_workers=4, threads_per_worker=1)
client = Client(cluster)

min_worker_memory = np.inf
for w in cluster.workers:
    min_worker_memory = min(min_worker_memory, w.memory_limit)

No cluster or client running


In [5]:
# Overrides DASK workers and forces the use of multiprocessing module

# dask.config.set(scheduler = 'processes')
# min_worker_memory = 1e9

## Place cell/unit simulator

In [6]:
class UnitGenerator:
    """
    Neural activity simulator for tetrodes.  For each unit found on a single tetrode, models the unit as
    a multivariate gaussians in 4-D amplitude (mark) space and firing rate in 1-D position space.  
    Simulates spike trains as a poisson process with input of time and position.
     Returns a table of spike times and their matching mark features.
    """
    def __init__(self, elec_grp_id, mark_mean, mark_cov, pos_mean, pos_var, peak_fr, sampling_rate):
        """
        Constructor to setup 4-D gaussian model of amplitude (mark) space and 1-D position.  The model is a 
        mark poisson distribution of when and what mark features spike events would have given an input of
        position.
        :param elec_grp_id: scalar ID for unique tetrodes
        :param mark_mean: list specifying the mean of each unit's mark gaussian distribution (4-D)
        :param mark_cov:  list specifying the covariance of each unit's mark gaussian distribution (4-D)
        :param pos_mean: scalar specifying the mean of each unit's position gaussian distribution (1-D)
        :param pos_var: scalar specifying the variance of each unit's position gaussian distribution (1-D)
        :param peak_fr: scalar specifying the peak firing rate of each unit.
        :param sampling_rate: uniform sampling rate to expect for simulation input
        """
        self.elec_grp_id = elec_grp_id
        self.mark_mean = mark_mean
        self.mark_cov = mark_cov
        self.pos_mean = pos_mean
        self.pos_var = pos_var
        self.rv_marks = sp.stats.multivariate_normal(mean=mark_mean, cov=np.diag(mark_cov))
        self.rv_pos = sp.stats.norm(loc=pos_mean, scale=pos_var)
        self.peak_fr = peak_fr
        self.sampling_rate = sampling_rate
        
    def simulate_spikes_over_pos(self, linpos_flat):
        """Simulate spikes given a list of uniformly sampled position data.
        
        :param linpos_flat: a Pandas Dataframe of uniformly sampled position data. Index should be time
                            and 'linpos_flat' should be the column name of 1-D positions
        :return: a SpikeFeatures dataframe of simulated spike times and corresponding amplitude (mark) 
        features
        """
        
        # Generate the probability of a spike occurring at each position depending on the 
        # firing rate - position map of the unit.
        prob_field = self.rv_pos.pdf(linpos_flat['linpos_flat'].values)/self.rv_pos.pdf(self.pos_mean)
        
        # Simulates spike train by treating each time point as a bernoulli trial
        spike_train = sp.stats.bernoulli(p=self.peak_fr/self.sampling_rate * prob_field).rvs()
        
        # Generate the mark features based on mark kernel.  Assumes mark probability is uniformly
        # distributed over position.
        marks = np.atleast_2d(self.rv_marks.rvs(sum(spike_train))).astype('i4')
        
        # list of spike indexes
        sample_num = np.nonzero(spike_train)[0]

        # reorganizing linpos data into a list of spike times
        time_ind = linpos_flat.index[sample_num]
        ind_levels = time_ind.levels.copy()
        ind_levels.append([self.elec_grp_id])
        ind_labels = time_ind.labels.copy()
        ind_labels.append([0]*len(time_ind))
        ind_names = time_ind.names.copy()
        ind_names.append('elec_grp_id')
        
        # Organizes returning DataFrame 
        new_ind = pd.MultiIndex(levels=ind_levels, labels=ind_labels, names=ind_names)
        new_ind = new_ind.reorder_levels(['day', 'epoch', 'elec_grp_id', 'timestamp', 'time'])
        #new_ind = new_ind.sortlevel(['day', 'epoch', 'elec_grp', 'timestamp', 'time'])

        # Packages Pandas data into a SpikeFeatures dataframe
        spk_amp = SpikeFeatures(marks, columns=['c00', 'c01', 'c02', 'c03'],
                                index=new_ind)
        mark_linpos = linpos_flat.iloc[sample_num]
        mark_linpos['elec_grp_id'] = self.elec_grp_id
        mark_linpos.set_index('elec_grp_id', append=True, inplace=True)
        mark_linpos = mark_linpos.reorder_levels(['day','epoch','elec_grp_id','timestamp','time'])
        
        return spk_amp, mark_linpos, prob_field
    

class AttrDict(dict):
    """
    A helper class that takes a dictionary and maps all of it's keys as class attributes.
    """
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

## Configuration settings for place cell simulator and encoding/decoding algorithms

In [7]:
# Encoding and decoding settings for both simulator and algorithm
encode_settings = AttrDict({'sampling_rate': 1000,
                            'pos_bins': np.arange(0,100,1),
                            'pos_bin_edges': np.arange(0,100.1,1),
                            'pos_bin_delta': 1,
                            'pos_kernel': sp.stats.norm.pdf(np.arange(0,100,1), 50, 1),
                            'pos_kernel_std': 1, 
                            'mark_kernel_std': int(20), 
                            'pos_num_bins': 100,
                            'pos_col_names': [pos_col_format(ii, 100) for ii in range(100)],
                            'arm_coordinates': [[0,100]],
                            'vel': 3,
                            'spk_amp': 60})

decode_settings = AttrDict({'trans_smooth_std': 5,
                            'trans_uniform_gain': 0.001,
                            'time_bin_size': 10})
                            

sampling_rate = 1000

# Simulate Synthetic Position Data
# Animal runs back and forth between two points, pausing for a few seconds in the middle.
pos_time = np.arange(0,100*1000,1)
pos_run = 50*np.cos(pos_time[0:17272]/(500*np.pi))+50
pos_run = np.append(pos_run, ([pos_run[-1]]*39478))
pos_run = np.append(pos_run, 50*np.cos(pos_time[56750:4000*1000]/(500*np.pi))+50)
pos_vel = np.concatenate([[0], np.diff(pos_run) * sampling_rate])

linpos_flat_obj = FlatLinearPosition.from_numpy_single_epoch(1, 1, pos_time, pos_run, pos_vel, sampling_rate, 
                                                             encode_settings.arm_coordinates)

# Multiple Synthetic Unit Parameters
mark_mean_range = [20,1000]
mark_cov_range = [200,500]
num_marks = 4
num_units = 50
firing_rate_range = [5,20]

pos_field_range = [0, 100]
pos_field_var_range = [5,10]

unit_mean = np.random.randint(*mark_mean_range, [num_units, num_marks])
unit_cov = np.random.randint(*mark_cov_range, [num_units, num_marks])

#unit_pos_mean = np.random.randint(*pos_field_range, [num_units])
#unit_pos_var = np.random.randint(*pos_field_var_range, [num_units])

unit_pos_mean = np.linspace(*pos_field_range, num_units)
unit_pos_var = np.array([pos_field_range[1]/num_units*2]*num_units)

unit_fr = np.random.randint(*firing_rate_range, [num_units])


# Loop that passes each unit's parameters to create a simulator per unit.  Assumes units
# are all on same tetrode.  At the same time generate synthetic data using synthetic position data.
units = {}
unit_spks = {}
spk_amps = pd.DataFrame()
for unit_ii in range(num_units):
    units[unit_ii] = UnitGenerator(elec_grp_id=1,
                                   mark_mean=unit_mean[unit_ii,:], mark_cov=unit_cov[unit_ii,:], 
                                   pos_mean=unit_pos_mean[unit_ii], pos_var=unit_pos_var[unit_ii], 
                                   peak_fr=unit_fr[unit_ii], sampling_rate=sampling_rate)

    unit_marks, mark_pos, field = units[unit_ii].simulate_spikes_over_pos(linpos_flat_obj)
    unit_spks[unit_ii] = unit_marks.merge(mark_pos, how='outer', left_index=True, right_index=True)
    
    spk_amps = spk_amps.append(unit_marks)
spk_amps.sort_index(level='timestamp', inplace=True)

# Merge all units, assuming all come from same tetrode.
spk_amps = spk_amps[~spk_amps.index.duplicated(keep='first')]
#spk_amps.drop_duplicates(inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


## Visualizing the synthetic data and inputs into the encoding/decoding model

In [8]:
spk_amps

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,c00,c01,c02,c03
day,epoch,elec_grp_id,timestamp,time,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,1,1,38,0.038,411,943,174,58
1,1,1,40,0.040,367,953,148,134
1,1,1,68,0.068,398,936,172,106
1,1,1,75,0.075,206,215,363,747
1,1,1,78,0.078,906,608,86,21
1,1,1,...,...,...,...,...,...
1,1,1,99953,99.953,250,933,557,479
1,1,1,99961,99.961,948,902,715,769
1,1,1,99971,99.971,238,891,551,474
1,1,1,99977,99.977,968,885,763,788


In [9]:
%%output size=200 backend='matplotlib'
%%opts Points [aspect=2] (marker='.')

# Plot simulated position
hv.Points(pos_run, kdims=[('samples', 'Samples'), ('pos', 'Position (cm)')], label='Synthetic position data for UnitGenerator and encoding algorithm')



In [10]:
hv.output(backend='matplotlib')

from holoviews.streams import Stream, param

# 3D interactive scatter plot of simulated tetrode
def mark_plots(elevation, azimuth):
    scatter = [hv.Scatter3D(mark_pos.loc[:,['linpos_flat','c01','c02']])
               for elec_id, mark_pos in unit_spks.items()]
    overlay = hv.Overlay(scatter, label="Plot of spikes and their features in linpos_flat and amplitude channels c01 and c02")
    overlay = overlay.opts({'Scatter3D':{'plot': {'fig_size':400, 'azimuth': int(azimuth), 
                                                  'elevation': int(elevation)},
                                         'norm': {'framewise':True}}})
    return overlay


#holo = hv.HoloMap({(e,a): mark_plots(e,a) for e in range(0, 181, 20)
#                   for a in range(-90,91,20)}, kdims=['e','a'])
#holo

dmap = hv.DynamicMap(callback=mark_plots, kdims=['elevation', 'azimuth'], cache_size=1)
dmap = dmap.redim.values(elevation=range(0,181,5),
                         azimuth=range(-90,91,5)).opts(norm=dict(framewise=True))
dmap

## Run encoding model and estimate position distribution for each encoding spike

In [11]:
%%time
#%%prun -r -s cumulative

# Setup encoding model and estimate the position distribution of each spike being encoded
encoder = OfflinePPEncoder(linflat=linpos_flat_obj, enc_spk_amp=spk_amps, dec_spk_amp=spk_amps, 
                           #decode_settings=decode_settings,
                           #encode_settings=encode_settings, dask_worker_memory=min_worker_memory)
                           encode_settings=encode_settings, decode_settings=decode_settings,
                           dask_chunksize=100)
observ_obj = encoder.run_encoder()

23:18:43 [INFO] spykshrk.franklab.pp_decoder.pp_clusterless: Manual Dask chunksize: 100
23:18:43 [INFO] spykshrk.franklab.pp_decoder.pp_clusterless: Expected worker peak memory usage: 8.31 MB
23:18:43 [INFO] spykshrk.franklab.pp_decoder.pp_clusterless: Worker total memory: UNKNOWN
Passing `normed=True` on non-uniform bins has always been broken, and computes neither the probability density function nor the probability mass function. The result is only correct if the bins are uniform, when density=True will produce the same result anyway. The argument will be removed in a future version of numpy.
INFO:root:Setting up encoder dask task.
INFO:root:Running compute tasks on dask workers.


CPU times: user 18.1 s, sys: 558 ms, total: 18.7 s
Wall time: 7.69 s


In [12]:
%%output backend='matplotlib' size=200
%%opts Points (s=400 marker='x')

# Setup plot to visualize estimated position distribution

sel_distrib = observ_obj.loc[:, pos_col_format(0,encode_settings.pos_num_bins):         
                                 pos_col_format(encode_settings.pos_num_bins-1,
                                                encode_settings.pos_num_bins)]
    
sel_pos = observ_obj.loc[:, 'position']

max = sel_distrib.max().max()
    
def plot_observ(ind):
        
    plot_list = []
    for ii in range(5):
        plot_list.append(hv.Curve(sel_distrib.iloc[ind+ii], extents=(0, 0, 100, max)))
        plot_list.append(hv.Points((sel_pos.iloc[ind+ii], [0.005])))
    return hv.Overlay(plot_list)
        
#Ind = Stream.define('stuff', ind=0)

dmap = hv.DynamicMap(plot_observ, kdims=['ind'])
dmap.redim.values(ind=list(range(0, len(observ_obj)-5, 5)))

## Run point process decoding algorithm

In [13]:
%%time
# Run PP decoding algorithm
time_bin_size = 10

decoder = OfflinePPDecoder(observ_obj=observ_obj, trans_mat=encoder.trans_mat['learned'], 
                           prob_no_spike=encoder.prob_no_spike,
                           encode_settings=encode_settings, decode_settings=decode_settings, 
                           time_bin_size=time_bin_size)

posteriors = decoder.run_decoder()

Beginning likelihood calculation
Beginning posterior calculation
CPU times: user 5.36 s, sys: 31.8 ms, total: 5.39 s
Wall time: 1.21 s


## Visualize decoded estimates of position over time

In [14]:
%%output backend='bokeh' size=400 holomap='scrubber'
%%opts RGB { +framewise} [height=100 width=150 colorbar=True]
%%opts Points {+framewise} [height=100 width=150] (marker='o' size=4 alpha=0.5)
%%opts Curve {+framewise} [height=100 width=100]

dec_viz = DecodeVisualizer(posteriors, linpos=linpos_flat_obj, enc_settings=encode_settings)

map = dec_viz.plot_all_dynamic(stream=hv.streams.RangeXY(), plt_range=100, slide=10)

tapstream = hv.streams.Tap(source=map, x=0.16, y=0)

def tap_print(x, y):
    # get observ bin
    observ_bin = math.floor((x - observ_obj.get_time_start())/(posteriors.dec_settings['time_bin_size'] / 
                                                               observ_obj.enc_settings['sampling_rate']))
    sel_observ = observ_obj.query('dec_bin == @observ_bin')
    sel_plots =  [hv.Curve(row, group=str(observ_bin)) 
                 for ii, (ind, row) in enumerate(sel_observ.get_distribution_view().iterrows())]
    if len(sel_plots) == 0:
        sel_plots = [hv.Curve([0,0],  group=str(observ_bin))]
    sel_overlay = hv.Overlay(sel_plots)
    return sel_overlay
    
map + hv.DynamicMap(tap_print, kdims=[], streams=[tapstream])

Unexpected plot option 'colorbar' for RGB in loaded backends 'bokeh' and 'matplotlib'.

Similar keywords in the currently active 'bokeh' renderer are: ['toolbar', 'bgcolor']

If you believe this keyword is correct, please make sure the backend has been imported or loaded with the hv.extension.

In [68]:
data = np.random.rand(100)

In [74]:
kernel = sp.stats.norm.pdf(np.arange(0,20,1), 10, .5)
conv = np.convolve(data, kernel)
hv.Curve(conv[10:110])

In [61]:
conv

array([0.43329071, 0.56627147, 0.69206588, 0.79854855, 0.87920546,
       0.93387548, 0.96703453, 0.98503152, 0.99377215, 0.99757081,
       0.99904809, 0.99904809, 0.99904809, 0.99904809, 0.99904809,
       0.99904809, 0.99904809, 0.99904809, 0.99904809, 0.99904809,
       0.99904809, 0.99904809, 0.99904809, 0.99904809, 0.99904809,
       0.99904809, 0.99904809, 0.99904809, 0.99904809, 0.99904809,
       0.99904809, 0.99904809, 0.99904809, 0.99904809, 0.99904809,
       0.99904809, 0.99904809, 0.99904809, 0.99904809, 0.99904809,
       0.99904809, 0.99904809, 0.99904809, 0.99904809, 0.99904809,
       0.99904809, 0.99904809, 0.99904809, 0.99904809, 0.99904809,
       0.99904809, 0.99904809, 0.99904809, 0.99904809, 0.99904809,
       0.99904809, 0.99904809, 0.99904809, 0.99904809, 0.99904809,
       0.99904809, 0.99904809, 0.99904809, 0.99904809, 0.99904809,
       0.99904809, 0.99904809, 0.99904809, 0.99904809, 0.99904809,
       0.99904809, 0.99904809, 0.99904809, 0.99904809, 0.99904

In [65]:
kernel = sp.stats.norm.pdf(np.arange(0,50,1), 25, 1)
data = np.ones(150)
conv = np.convolve(data, kernel, mode='same')
hv.Curve(conv[25:125])

In [19]:
help(np.convolve)

Help on function convolve in module numpy.core.numeric:

convolve(a, v, mode='full')
    Returns the discrete, linear convolution of two one-dimensional sequences.
    
    The convolution operator is often seen in signal processing, where it
    models the effect of a linear time-invariant system on a signal [1]_.  In
    probability theory, the sum of two independent random variables is
    distributed according to the convolution of their individual
    distributions.
    
    If `v` is longer than `a`, the arrays are swapped before computation.
    
    Parameters
    ----------
    a : (N,) array_like
        First one-dimensional input array.
    v : (M,) array_like
        Second one-dimensional input array.
    mode : {'full', 'valid', 'same'}, optional
        'full':
          By default, mode is 'full'.  This returns the convolution
          at each point of overlap, with an output shape of (N+M-1,). At
          the end-points of the convolution, the signals do not overlap