In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import os

from spykshrk.realtime.simulator import nspike_data

from spykshrk.franklab.pp_decoder.pp_clusterless import OfflinePPDecoder, plot_decode_2d
from spykshrk.franklab.pp_decoder.data_containers import EncodeSettings, DecodeSettings, SpikeObservation, \
                                                         LinearPositionContainer
from spykshrk.franklab.pp_decoder.data_containers import LinearPositionContainer, SpikeObservation

        
%load_ext Cython

%matplotlib inline

#pd.set_option('float_format', '{:,.2f}'.format)
pd.set_option('display.precision', 4)
pd.set_option('display.max_rows', 10)
#pd.set_option('display.width', 180)

idx = pd.IndexSlice


In [3]:
# Load config file and data

config_file = '/home/daliu/Src/spykshrk_realtime/config/bond_single.json'
config = json.load(open(config_file, 'r'))

# Main hdf5 data source file name
hdf_file = os.path.join(config['files']['output_dir'],
                        '{}.rec_merged.h5'.format(config['files']['prefix']))

# Extract just encode and decode settings from config
encode_settings = EncodeSettings(config)
decode_settings = DecodeSettings(config)

# Open data file
store = pd.HDFStore(hdf_file, mode='r')

# Encapsulate Spike Observation panda table in container
observ_obj = SpikeObservation(store['rec_3'])

# Grab stimulation lockout times
stim_lockout = store['rec_11']

# Grab animal linearized real position
nspike_anim = nspike_data.AnimalInfo(**config['simulator']['nspike_animal_info'])
pos = nspike_data.PosMatDataStream(nspike_anim)
pos_data = pos.data

# Encapsulate linear position
lin_obj = LinearPositionContainer(pos_data, encode_settings)

In [4]:
# Linearized position data, example of MultiIndexing pandas table
lin_obj.pos_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lin_dist_well,lin_dist_well,lin_dist_well,lin_vel,lin_vel,lin_vel,seg_idx,time,time
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,well_center,well_left,well_right,well_center,well_left,well_right,seg_idx,time,timestamp
day,epoch,timestamp,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
4,1,73830339.0,27.8,142.1,144.5,7.5,134.4,136.4,1.0,2461.0,73830339.0
4,1,73831341.0,26.9,143.0,145.4,7.5,134.4,136.4,1.0,2461.0,73831341.0
4,1,73832343.0,25.5,144.3,146.8,6.7,134.5,136.6,1.0,2461.1,73832343.0
4,1,73833342.0,24.6,145.2,147.7,5.8,134.1,136.1,1.0,2461.1,73833342.0
4,1,73834344.0,23.3,146.6,149.0,4.9,133.2,135.1,1.0,2461.1,73834344.0
4,1,...,...,...,...,...,...,...,...,...,...
4,1,102145374.0,7.0,162.8,165.3,-4.0,-128.6,-130.5,1.0,3404.8,102145374.0
4,1,102146376.0,7.1,162.8,165.2,-4.2,-131.4,-133.4,1.0,3404.9,102146376.0
4,1,102147378.0,7.5,162.3,164.8,-4.4,-133.7,-135.7,1.0,3404.9,102147378.0
4,1,102148377.0,7.5,162.3,164.8,-4.5,-135.4,-137.5,1.0,3404.9,102148377.0


In [5]:
# Up sampling position data to 30 samples/bin, using backfill to interpolate
lin_obj.get_resampled(30)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lin_dist_well,lin_dist_well,lin_dist_well,lin_vel,lin_vel,lin_vel,seg_idx,time,time,bin
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,well_center,well_left,well_right,well_center,well_left,well_right,seg_idx,time,timestamp,Unnamed: 12_level_1
day,epoch,timestamp,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
4,1,73830360.0,26.9,143.0,145.4,7.5,134.4,136.4,1.0,2461.0,73831341.0,0
4,1,73830390.0,26.9,143.0,145.4,7.5,134.4,136.4,1.0,2461.0,73831341.0,1
4,1,73830420.0,26.9,143.0,145.4,7.5,134.4,136.4,1.0,2461.0,73831341.0,2
4,1,73830450.0,26.9,143.0,145.4,7.5,134.4,136.4,1.0,2461.0,73831341.0,3
4,1,73830480.0,26.9,143.0,145.4,7.5,134.4,136.4,1.0,2461.0,73831341.0,4
4,1,...,...,...,...,...,...,...,...,...,...,...
4,1,102149250.0,7.6,162.3,164.7,-4.7,-136.5,-138.6,1.0,3405.0,102149379.0,943963
4,1,102149280.0,7.6,162.3,164.7,-4.7,-136.5,-138.6,1.0,3405.0,102149379.0,943964
4,1,102149310.0,7.6,162.3,164.7,-4.7,-136.5,-138.6,1.0,3405.0,102149379.0,943965
4,1,102149340.0,7.6,162.3,164.7,-4.7,-136.5,-138.6,1.0,3405.0,102149379.0,943966


In [6]:
# Down sampling position data to 30000 samples/bin, dropping data points
lin_obj.get_resampled(30000)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lin_dist_well,lin_dist_well,lin_dist_well,lin_vel,lin_vel,lin_vel,seg_idx,time,time,bin
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,well_center,well_left,well_right,well_center,well_left,well_right,seg_idx,time,timestamp,Unnamed: 12_level_1
day,epoch,timestamp,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
4,1,73860000.0,0.0,169.9,172.3,-8.8,32.0,32.4,1.0,2462.0,73860369.0,0
4,1,73890000.0,2.4,167.4,169.9,3.1,-3.1,-3.1,1.0,2463.0,73890399.0,1
4,1,73920000.0,5.0,164.8,167.3,1.5,-1.5,-1.5,1.0,2464.0,73920429.0,2
4,1,73950000.0,5.9,163.9,166.4,0.4,-0.4,-0.4,1.0,2465.0,73950459.0,3
4,1,73980000.0,5.9,163.9,166.4,-0.2,0.2,0.2,1.0,2466.0,73980489.0,4
4,1,...,...,...,...,...,...,...,...,...,...,...
4,1,102000000.0,0.0,169.9,172.3,0.0,0.0,0.0,1.0,3400.0,102000231.0,938
4,1,102030000.0,0.0,169.9,172.3,0.0,0.0,0.0,1.0,3401.0,102030261.0,939
4,1,102060000.0,0.0,169.9,172.3,0.0,0.0,0.0,1.0,3402.0,102060291.0,940
4,1,102090000.0,0.0,169.9,172.3,2.9,-2.9,-2.9,1.0,3403.0,102090321.0,941


In [10]:
# Observation distribution of each spike in a single epoch. This is calculated and cached from 
# an encoding model in the realtime module. Currently this is only valid for a single epoch's data.

observ_obj.spike_dec

Unnamed: 0,rec_ind,timestamp,ntrode_id,position,x0,x1,x2,x3,x4,x5,...,x440,x441,x442,x443,x444,x445,x446,x447,x448,x449
0,1,73830048,29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,73830066,13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,73830144,14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,6,73830192,14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,73830204,13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
303106,237333,102149649,11,7.6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
303107,55281,102149697,12,7.6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
303108,96729,102149817,17,7.6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
303109,237337,102149925,11,7.6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# Assign bins (300 samples == 10ms) to each spike based on its timestamp, bins stored as dec_bin column.

observ_obj.get_observations_bin_assigned(300)

Unnamed: 0,rec_ind,timestamp,ntrode_id,position,x0,x1,x2,x3,x4,x5,...,x442,x443,x444,x445,x446,x447,x448,x449,dec_bin,dec_bin_start
0,1,73830048,29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,73830000
1,1,73830066,13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,73830000
2,2,73830144,14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,73830000
3,6,73830192,14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,73830000
4,5,73830204,13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,73830000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
303106,237333,102149649,11,7.6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94398,102149400
303107,55281,102149697,12,7.6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94398,102149400
303108,96729,102149817,17,7.6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94399,102149700
303109,237337,102149925,11,7.6,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,94399,102149700


In [None]:
# For each time bin, compute the product of the distribution stored in columns x0:x449.
# This estimates the probability distribution of position at each time bin. Refer to 
# spykshrk.franklab.pp_decoder.pp_clusterless.OfflinePPDecoder.calc_observation_intensity
# for analysis code that uses groupby.

spike_decode = observ_obj.get_observations_bin_assigned(300)
groups = spike_decode.groupby('dec_bin')

def prod_dist(df):
    return df.loc[:,'x0':'x449'].prod()

groups.apply(prod_dist)