In [465]:
import os
import sys
sys.path.insert(0, os.path.abspath(r'D:\Code Repos\prey_capture'))

import paths
import functions_bondjango as bd
import functions_loaders as fl
import functions_plotting as fp
import snakemake_scripts.classify_batch as class_fun
import yaml
import processing_parameters
import datetime

import numpy as np
import pandas as pd
from sklearn import preprocessing
import sklearn.metrics as smet
import sklearn.linear_model as lin
import sklearn.model_selection as mod
import h5py
import functions_misc as fm
import random
import scipy.stats as stat
import scipy.signal as signal
import importlib

import panel as pn
import holoviews as hv
from holoviews import opts, dim
hv.extension('bokeh')
from bokeh.resources import INLINE
from bokeh import palettes

In [242]:
importlib.reload(fp)
importlib.reload(processing_parameters)
# set up the figure theme
fp.set_theme()
label_dict = processing_parameters.label_dictionary

In [243]:
# load the target files

# get the search string
animal = processing_parameters.animal
day = processing_parameters.day
rig = processing_parameters.rig
search_string = 'imaging:doric, slug:%s' % day

# query the database for data to plot
data_all = bd.query_database('analyzed_data', search_string)

input_path = [el['analysis_path'] for el in data_all if ('preproc' in el['slug'] and animal.lower() in el['slug'])]
print(input_path)

['J:\\Drago Guggiana Nilo\\Prey_capture\\AnalyzedData\\04_02_2021_10_34_47_miniscope_DG_210202_a_succ_preproc.hdf5', 'J:\\Drago Guggiana Nilo\\Prey_capture\\AnalyzedData\\04_02_2021_10_30_38_miniscope_DG_210202_a_succ_preproc.hdf5', 'J:\\Drago Guggiana Nilo\\Prey_capture\\AnalyzedData\\04_02_2021_10_25_41_miniscope_DG_210202_a_succ_preproc.hdf5', 'J:\\Drago Guggiana Nilo\\Prey_capture\\AnalyzedData\\04_02_2021_10_21_22_miniscope_DG_210202_a_succ_preproc.hdf5', 'J:\\Drago Guggiana Nilo\\Prey_capture\\AnalyzedData\\04_02_2021_10_16_10_miniscope_DG_210202_a_succ_preproc.hdf5', 'J:\\Drago Guggiana Nilo\\Prey_capture\\AnalyzedData\\04_02_2021_10_10_35_miniscope_DG_210202_a_succ_preproc.hdf5', 'J:\\Drago Guggiana Nilo\\Prey_capture\\AnalyzedData\\04_02_2021_10_06_47_miniscope_DG_210202_a_succ_preproc.hdf5', 'J:\\Drago Guggiana Nilo\\Prey_capture\\AnalyzedData\\04_02_2021_10_03_36_miniscope_DG_210202_a_succ_preproc.hdf5']


In [244]:
# load the data
data_list = []
meta_list = []
frame_list = []
for idx, el in enumerate(input_path):
    # get the trial timestamp (for frame calculations)
    time_stamp = int(''.join(os.path.basename(el).split('_')[3:6]))
    # also get the trial time signature, used as unique ID
    # time_signature = int(''.join(os.path.basename(el).split('_')[0:6]))

    try:
        temp_data = pd.read_hdf(el, 'matched_calcium')
        # temp_data['trial_id'] = time_signature
        temp_data['id'] = data_all[idx]['id']

        meta_list.append([data_all[idx][el1] for el1 in processing_parameters.meta_fields])
        # try to load the motifs and latents
        try:
            latents = pd.read_hdf(el, 'latents')
            motifs = pd.read_hdf(el, 'motifs')
            egocentric_coords = pd.read_hdf(el, 'egocentric_coord')
            egocentric_coords = egocentric_coords.loc[:, ['cricket_0_x', 'cricket_0_y']]
            egocentric_coords = egocentric_coords.rename(columns={'cricket_0_x': 'ego_cricket_x',
                                                                  'cricket_0_y': 'ego_cricket_y'})
            # pad the latents
            [latents, motifs] = fl.pad_latents([latents, motifs], temp_data.shape[0])
            # concatenate with the main data
            temp_data = pd.concat([temp_data, egocentric_coords, latents, motifs], axis=1)
        except KeyError:
            print(f'No latents in file {el}')
        data_list.append(temp_data)
        frame_list.append([time_stamp, 0, temp_data.shape[0]])
    except KeyError:
        # data_list.append([])
        frame_list.append([time_stamp, 0, 0])

In [295]:
# define the regression parameters and perform the regression
importlib.reload(class_fun)
# parameters
target_behavior = 'cricket_0_mouse_distance'
time_shift = 40
chunk = True
repeats = 5
shuffle = False
chunk_size = 0.05

# get the cell labels
labels = list(np.unique(np.array([el.columns for el in data_list]).flatten()))
cells = [el for el in labels if 'cell' in el]
# get the data
sub_data = [el[[target_behavior]+cells] for el in data_list if (target_behavior in el.columns)]
sub_data = pd.concat(sub_data)
# get the parameter of interest
parameter_working = sub_data.loc[:, target_behavior].to_numpy().copy()
calcium_data_working = np.array(sub_data[cells].copy())

# allocate lists for the outputs
pred_list = []
coeff_list = []
score_list = []
# for all the repeats
for reps in np.arange(repeats):
    # create the regressor
    linear = lin.TweedieRegressor(alpha=0.01, max_iter=5000, fit_intercept=False, power=0)

    # run the training function
    linear_pred, coefficients, cc_score = class_fun.train_test_regressor(parameter_working,
                                                                         calcium_data_working,
                                                                         preprocessing.StandardScaler,
                                                                         linear,
                                                                         stat.spearmanr,
                                                                         time_s=time_shift,
                                                                         shuffle=shuffle,
                                                                         empty=False,
                                                                         chunk=chunk,
                                                                         chunk_size=chunk_size,
                                                                         test_size=0.3)
    pred_list.append(linear_pred)
    coeff_list.append(coefficients)
    score_list.append(cc_score)
print(np.mean(score_list))


-0.0631145811539489


In [297]:
# plot the predictions

plot_list = []

x = np.arange(pred_list[0].shape[0])
# for all the repeats
for reps in np.arange(repeats):
    plot = hv.Curve((x, pred_list[reps]))
    plot.opts(width=1000)
    plot_list.append(plot)
# also plot the sem for each time point
mean = np.mean(pred_list, axis=0)
# sem = stat.sem(pred_list, axis=0)
std = np.std(pred_list, axis=0)
plot = hv.Spread((x, mean, std))
plot_list.append(plot)

# and the original trace
if time_shift >= 0:
    y = parameter_working[time_shift:].copy()
else:
    y = parameter_working[:time_shift].copy()
        
plot = hv.Curve((x, y))
plot.opts(line_dash='dotted', title=f'Mean cc: {np.mean(score_list):0.2f}')
plot_list.append(plot)
    
hv.Overlay(plot_list)

In [149]:
# plot the weights

plot_list = []

x = np.arange(coeff_list[0].shape[0])
# for all the repeats
for reps in np.arange(repeats):
    plot = hv.Scatter((x, coeff_list[reps]))
    plot.opts(width=1000, size=10)
    plot_list.append(plot)
# also plot the sem for each weight
mean = np.mean(coeff_list, axis=0)
# sem = stat.sem(coeff_list, axis=0)
std = np.std(coeff_list, axis=0)
plot = hv.ErrorBars((x, mean, std))
plot.opts(title=f'Average weight std: {np.mean(std):0.2f}')
plot_list.append(plot)

hv.Overlay(plot_list)

In [474]:
# calculate regression over time

importlib.reload(class_fun)
# parameters
target_behavior = 'mouse_speed'
time_shift = [-40, -30, -20, -10, 0, 10, 20, 30, 40]
# time_shift = [-15, -10, -5, 0, 5, 10, 15]
chunk = True
repeats = 10
# shuffle = True
chunk_size = 0.1
chunk_size_shuffle = chunk_size

# get the cell labels
labels = list(np.unique(np.array([el.columns for el in data_list]).flatten()))
cells = [el for el in labels if 'cell' in el]
# get the data
sub_data = [el[[target_behavior]+cells] for el in data_list if (target_behavior in el.columns)]
sub_data = pd.concat(sub_data)
# get the parameter of interest
parameter_working = sub_data.loc[:, target_behavior].to_numpy().copy()
calcium_data_working = np.array(sub_data[cells].copy())

# allocate memory for the shuffles
pred_shuffle = []
coeff_shuffle = []
score_shuffle = []
# for real and shuffle
for realvshuffle in np.arange(2):
    
    if realvshuffle == 0:
        shuffle_f = False
    else:
        shuffle_f = True
    # allocate lists for the outputs
    pred_time = []
    coeff_time = []
    score_time = []
    # for all the time shifts
    for time_s in time_shift:
        pred_list = []
        coeff_list = []
        score_list = []
        # for all the repeats
        for reps in np.arange(repeats):
            # create the regressor
            linear = lin.TweedieRegressor(alpha=0.01, max_iter=5000, fit_intercept=False, power=0)

            # run the training function
            linear_pred, coefficients, cc_score = class_fun.train_test_regressor(parameter_working,
                                                                                 calcium_data_working,
                                                                                 preprocessing.StandardScaler,
                                                                                 linear,
                                                                                 stat.spearmanr,
#                                                                                  smet.r2_score,
                                                                                 time_s=time_s,
                                                                                 shuffle_f=shuffle_f,
                                                                                 empty=False,
                                                                                 chunk=chunk,
                                                                                 test_size=0.3,
                                                                                 shuffle=False,
                                                                                 chunk_size=chunk_size,
                                                                                 chunk_size_shuffle=chunk_size_shuffle)
                                                                                 
            pred_list.append(linear_pred)
            coeff_list.append(coefficients)
            score_list.append(cc_score)
        pred_time.append([np.mean(pred_list, axis=0), np.std(pred_list, axis=0)])
        coeff_time.append([np.mean(coeff_list, axis=0), np.std(coeff_list, axis=0)])
        score_time.append([np.mean(score_list), np.std(score_list)])
    pred_shuffle.append(pred_time)
    coeff_shuffle.append(coeff_time)
    score_shuffle.append(score_time)
# print(np.mean(score_list))

In [464]:
# plot the results over time

plot_list = []
mean = hv.Scatter((time_shift, [el[0] for el in score_shuffle[0]]))
std = hv.Spread((time_shift, [el[0] for el in score_shuffle[0]], [el[1] for el in score_shuffle[0]]))
mean.opts(width=1000, color='red')
std.opts(width=1000, color='red')
plot_list.append(mean*std)
mean = hv.Scatter((time_shift, [el[0] for el in score_shuffle[1]]))
std = hv.Spread((time_shift, [el[0] for el in score_shuffle[1]], [el[1] for el in score_shuffle[1]]))
mean.opts(width=1000, color='black')
std.opts(width=1000, color='black')
plot_list.append(mean*std)
hv.Overlay(plot_list)


In [482]:
# compute the autocorrelation of a given variable

plot_list = []
autocorrelation = signal.correlate(parameter_working, parameter_working, 'same')
x = np.arange(-autocorrelation.shape[0]/2, autocorrelation.shape[0]/2)
plot = hv.Curve((x, autocorrelation))
plot.opts(width=1000, color='red')
plot_list.append(plot)
for reps in np.arange(repeats):
    parameter_shuffle = class_fun.chunk_shuffle(parameter_working, chunk_size_shuffle=0.01)
#     print(parameter_shuffle)
    auto_shuffle = signal.correlate(parameter_shuffle, parameter_shuffle, 'same')
    plot = hv.Curve((x, auto_shuffle))
    plot.opts(width=1000, color='black')
    plot_list.append(plot)

hv.Overlay(plot_list)