# Use regression and classification to infer variables from neural data

In [None]:
# imports
import os
import sys
sys.path.insert(0, os.path.abspath(r'D:\Code Repos\prey_capture'))

import panel as pn
import holoviews as hv
from holoviews import opts, dim
hv.extension('bokeh')
from bokeh.resources import INLINE

import paths
import functions_bondjango as bd
import pandas as pd
import numpy as np
import sklearn.mixture as mix
import sklearn.decomposition as decomp
from sklearn.svm import SVR
from sklearn.model_selection import cross_validate
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from statsmodels.tools.sm_exceptions import PerfectSeparationError
from scipy.stats import sem

import functions_plotting as fp
import functions_data_handling as fd
import umap
import random

In [None]:
# load the data
# get the data paths
try:
    data_path = snakemake.input[0]
except NameError:
    # define the search string
    search_string = 'result:succ, lighting:normal, rig:miniscope, =analysis_type:aggFullCA'
    # query the database for data to plot
    data_all = bd.query_database('analyzed_data', search_string)
    data_path = data_all[0]['analysis_path']
print(data_path)

# load the data
data = fd.aggregate_loader(data_path)

In [None]:
# Regress a target variable

# define the target variable
target_variables = ['mouse_x','mouse_y','cricket_x','cricket_y','mouse_cricket_distance',
                    'mouse_speed','cricket_speed','delta_heading','delta_head']

# target_variables = ['mouse_cricket_distance']

# define the vector of angle selectors
bino_vector = ['sides', 'center', 'all']
# allocate a dictionary for the modles
model_dict = {}
# for the angle ranges
for bino in bino_vector:
    # allocate the main storage
    results = {}

    # get the neural activity paired with the target variable only

    # # define the bino and non-bino zones
    # bino = 1

    # get the mice for this dataset
    mice = data.keys()

    # for all the mice
    for mouse in mice:

        # get the dates for this mouse
        dates = data[mouse].keys()
        # for all the dates
        for day in dates:
            # get the table
            sub_data = data[mouse][day]
            # get the available columns
            labels = list(sub_data.columns)
            cells = [el for el in labels if 'cell' in el]
            not_cells = [el for el in labels if 'cell' not in el]
            # get the cell data
    #         calcium_data = preprocessing.scale(sub_data[cells].copy())
            calcium_data = np.array(sub_data[cells].copy())


            # select only the data within the binocular zone or outside it
            if bino == 'center':
                selection_bool = np.abs(sub_data['delta_head']) <= 20
#                 selection_bool = (sub_data['delta_heading']) <= 20
            elif bino == 'sides':
                selection_bool = ((sub_data['delta_head'])<-20) & ((sub_data['delta_head'])>-40)
#                 selection_bool = ((sub_data['delta_heading'])>20) & ((sub_data['delta_heading'])<121.4)
#                 selection_bool = ((sub_data['delta_head'])>30.7) & ((sub_data['delta_head'])<70.7)
            else:
#                 selection_bool = np.ones((sub_data.shape[0])) == 1
#                 selection_bool = ((sub_data['delta_head'])>20) & ((sub_data['delta_head'])<80)

                selection_bool = ((sub_data['delta_head'])>20) & ((sub_data['delta_head'])<40)

#                 selection_bool = (sub_data['delta_heading'] < -20) & (sub_data['delta_heading'] > -121.4)
#                 selection_bool = ((sub_data['delta_head'])<-30.7) & ((sub_data['delta_head'])>-70.7)

            calcium_data = preprocessing.scale(calcium_data[selection_bool, :])
            
            # allocate memory for the per day/sessions results
            var_results = {}

            # for all the target variables
            for target_variable in target_variables:
                # get the target data
                target_data = sub_data[target_variable].copy()
                target_data = preprocessing.scale(target_data[selection_bool])
        
#                 # shuffle the data vector
#                 random.shuffle(target_data)
    
                # get the estimator
                exog, endog = calcium_data, target_data
                mod = sm.GLM(endog, exog, family=sm.families.Gaussian())
                try:
                    # run the regression
                    var_results[target_variable] = mod.fit()
                except PerfectSeparationError:
                    var_results[target_variable] = np.nan
                    continue

                
            # store in the main dict
            results[(mouse,day)] = var_results
    # save the whole model
    model_dict[bino] = results
print('Done')

In [None]:
# Plot example fits for GLM
holo_dict = {}
holo_dict2 = {}
# define the variable to visualize
vis_variable = 'cricket_x'
# define which model to visualize
bino = 'center'
results = model_dict[bino]

# get the estimator
# get the mice for this dataset
mice = data.keys()

# for all the mice
for mouse in mice:
    
    # get the dates for this mouse
    dates = data[mouse].keys()
    # for all the dates
    for day in dates:
        # get the table
        sub_data = data[mouse][day]
        # get the target data
        target_data = np.array(sub_data[vis_variable])
        
        # select only the data within the binocular zone or outside it
        if bino == 'center':
            selection_bool = np.abs(sub_data['delta_heading']) <= 20
        elif bino == 'sides':
            selection_bool = np.abs(sub_data['delta_heading'] > 20) & np.abs(sub_data['delta_heading'] < 40)
        else:
            selection_bool = np.ones((sub_data.shape[0])) == 1
#             selection_bool = np.abs(sub_data['delta_heading'] > 20) & np.abs(sub_data['delta_heading'] < 40)


        target_data = preprocessing.scale(target_data[selection_bool].copy())
        x_range = np.arange(target_data.shape[0])
        
        # get the estimator
        fitted_data = results[(mouse,day)][vis_variable].mu
        
        # plot the results
        holo_dict[(mouse, day)] = hv.Curve((x_range, target_data), label='real_data').opts(shared_axes=False)  *\
        hv.Curve((x_range,fitted_data), label='fitted_data').opts(shared_axes=False)
        holo_dict2[(mouse, day)] = hv.Scatter((target_data, fitted_data)).opts(aspect='square', shared_axes=False)

holo_image = pn.panel(hv.HoloMap(holo_dict, kdims=['mouse', 'day'])+hv.HoloMap(holo_dict2, kdims=['mouse', 'day']), 
                      center=True, widget_location='top')
holo_image

In [None]:
# Plot the results
print(results[0]['test_score'])
frequencies, edges = np.histogram([el['test_score'] for el in results])
hv.Histogram((edges, frequencies))

In [None]:
# Plot example fits for SVR

holo_dict = {}
# define the variable to visualize
vis_variable = 'mouse_cricket_distance'

# get the estimator
# get the mice for this dataset
mice = data.keys()

# for all the mice
for mouse in mice:
    
    # get the dates for this mouse
    dates = data[mouse].keys()
    # for all the dates
    for day in dates:
        # get the table
        sub_data = data[mouse][day]
        # get the target data
        target_data = preprocessing.scale(sub_data[target_variable])
        # get the x range
        x_range = np.arange(target_data.shape[0])
        # get the available columns
        labels = list(sub_data.columns)
        cells = [el for el in labels if 'cell' in el]
        not_cells = [el for el in labels if 'cell' not in el]
        # get the cell data
        calcium_data = preprocessing.scale(sub_data[cells].copy())
        
        # get the estimator
        estimator = results[(mouse,day)][vis_variable]['estimator']
        # fit the data
        fitted_data = np.mean(np.array([el.predict(calcium_data) for el in estimator]), axis=0)
        # plot the results
        holo_dict[(mouse, day)] = hv.Curve((x_range, target_data), label='real_data')  * hv.Curve((x_range,fitted_data), label='fitted_data')

holo_image = pn.panel(hv.HoloMap(holo_dict, kdims=['mouse', 'day']), center=True, widget_location='top')
holo_image

In [None]:
# plot the r2s
r2_list = []
# define the variable to visualize
vis_variable = 'mouse_cricket_distance'

# get the mice for this dataset
mice = data.keys()

# initialize a counter for the x coordinate
x_counter = 1
# for all the mice
for mouse in mice:
    
    # get the dates for this mouse
    dates = data[mouse].keys()
    # for all the dates
    for day in dates:
        # get the r2
        r2_vector = results[(mouse,day)][vis_variable]['test_score']
#         r2_list.append(hv.Scatter(([x_counter]*len(r2_vector),r2_vector)))
        r2_list.append(r2_vector)
        # update the counter
        x_counter += 1

print(np.mean(r2_list))
# hv.Overlay(r2_list)
        


In [None]:
# get a plot with the fit r2 and the variables

# allocate memory to store the images
image_dict = {}
# also to store the matrices
matrix_dict = {}

# for all the models
for bino in bino_vector:
    # load the results
    results = model_dict[bino]
    # allocate memory to compile the results
    compiled_results = []
    mouse_data_names = []

    # get the mice for this dataset
    mice = data.keys()

    # for all the mice
    for mouse in mice:

        # get the dates for this mouse
        dates = data[mouse].keys()
        # for all the dates
        for day in dates:
            # get the results dict
            sub_results = results[(mouse,day)]
            # compile the results on a list and append to the main list
#             compiled_results.append([sub_results[el]['test_score'] for el in sub_results.keys()])
            compiled_results.append([np.log(1/sub_results[el].deviance) 
                                     if not isinstance(sub_results[el], np.float) else np.nan for el in sub_results.keys()])
            # save the mouse and date
            mouse_data_names.append('_'.join((mouse,day)))
    # save the variable names
    variable_names = sub_results.keys()
    variable_names = [(idx+0.5, el) for idx, el in enumerate(variable_names)]
    mouse_data_names = [(idx+0.5, el) for idx, el in enumerate(mouse_data_names)]


    # turn into a matrix
    compiled_results = np.array(compiled_results)
#     compiled_results = np.mean(np.array(compiled_results), axis=2)
#     compiled_results[compiled_results<0] = np.nan


    # average across reps and plot
    r2_image = hv.Image(compiled_results, bounds=[0, 0, compiled_results.shape[1], compiled_results.shape[0]])
    r2_image.opts(width=600, invert_axes=False, invert_yaxis=False, 
                           invert_xaxis=False, cmap='Viridis', xticks=variable_names, yticks=mouse_data_names, 
                  xrotation=45, tools=['hover'], colorbar=True)

    image_dict[bino] = r2_image
    matrix_dict[bino] = compiled_results
# raw_image = hv.Image(sub_array, bounds=[0,0,len(y_labels),10], label=mouse+'_'+day)
#         raw_image.opts(width=800, height=600, invert_axes=True, invert_yaxis=True, 
#                        invert_xaxis=True, cmap='Viridis', yticks=y_labels, tools=['hover'], shared_axes=False)

In [None]:
# plot the delta models

image_dict['center'] + image_dict['sides'] + image_dict['all']

In [None]:
# average across days and animals

plots = []

# define the variable labels
x_labels = [(idx, el) for idx, el in enumerate(target_variables)]
for bino in bino_vector:
    
    compiled_results = matrix_dict[bino]

    average = np.nanmean(compiled_results, axis=0)
    errors = sem(compiled_results, axis=0, nan_policy='omit')
    line = hv.Curve((list(np.arange(average.shape[0])), average),label=bino, vdims='Goodness of fit').opts(
        width=400, height=400, shared_axes=False,xticks=x_labels, xrotation=45, padding=0.1, 
        fontsize={'title': 16, 'labels': 14, 'xticks': 12, 'yticks': 12})
    shadow = hv.Spread((list(np.arange(average.shape[0])), average, errors)).opts(shared_axes=False) 
    
    plots.append(line*shadow)

hv.Overlay(plots)

In [None]:
# average across reps and plot
compiled_results = matrix_dict['center'] - matrix_dict['sides']

average = np.nanmean(compiled_results, axis=0)
errors = sem(compiled_results, axis=0, nan_policy='omit')

delta_image = hv.Image(compiled_results, bounds=[0, 0, compiled_results.shape[1], compiled_results.shape[0]])
delta_image.opts(width=600, invert_axes=False, invert_yaxis=False, 
                       invert_xaxis=False, cmap='Viridis', xticks=variable_names, yticks=mouse_data_names, 
              xrotation=45, tools=['hover'], colorbar=True, shared_axes=False)

line = hv.Curve((list(np.arange(average.shape[0])), average), 
                vdims=['Goodness of fit']).opts(shared_axes=False, xticks=x_labels, xrotation=45, padding=0.1)
shadow = hv.Spread((list(np.arange(average.shape[0])), average, errors)).opts(shared_axes=False) 
delta_image+(line*shadow).opts(shared_axes=False)

In [None]:
# extract and plot the cell weights

weights_dict = {}
# get the mice for this dataset
mice = data.keys()

# for all the mice
for mouse in mice:

    # get the dates for this mouse
    dates = data[mouse].keys()
    # for all the dates
    for day in dates:
        # allocate memory to accumulate these results
        temp_matrix = []
        for bino in bino_vector:
            # get the model
            results = model_dict[bino][(mouse,day)]
            temp_matrix.append([results[el].params for el in results])
        
        weights_dict[(mouse,day)] = np.array(temp_matrix)

In [None]:
# plot a selected mouse/day

# allocate a dictionary for the histograms
histogram_dict = {}

# get the mice for this dataset
mice = data.keys()

# for all the mice
for mouse in mice:

    # get the dates for this mouse
    dates = data[mouse].keys()
    # for all the dates
    for day in dates:
        # get the matrix
        weights = weights_dict[(mouse,day)]
        # allocate a matrix for the images
        weights_histograms = []
        # calculate histograms
        for idx, bino in enumerate(bino_vector):
#             frequencies, edges = np.histogram(weights[idx, :, :], 20)
#             weights_histograms.append(hv.Histogram((edges, frequencies)).opts(shared_axes=False))
#         histogram_dict[(mouse, day)] = hv.Overlay(weights_histograms).opts(shared_axes=False)
            # get the std across days/mice
            temp_std = np.std(weights[idx, :, :], axis=1)
            temp_std = temp_std/np.amax(temp_std)
            weights_histograms.append(hv.Curve((temp_std)).opts(shared_axes=False))
        # save the overlay
        histogram_dict[(mouse,day)] = hv.Overlay(weights_histograms).opts(shared_axes=False)
        
        
holo_histogram = pn.panel(hv.HoloMap(histogram_dict, kdims=['mouse', 'day']).opts(shared_axes=False), 
                      center=True, widget_location='top')
holo_histogram
# holo = hv.HoloMap(histogram_dict, kdims=['mouse', 'day'])
# holo.opts(opts.Overlay(opts.Histogram(shared_axes=False), shared_axes=False))
# holo

# holo_histogram
#         # allocate a matrix for the images
#         weights_image =[]
#         variable_names = [(idx+0.5, el) for idx, el in enumerate(target_variables)]
#         for idx, bino in enumerate(bino_vector):
#             target_matrix = weights[idx, :, :].T
#             delta_image = hv.Image(target_matrix, bounds=[0, 0, target_matrix.shape[1], target_matrix.shape[0]])
#             delta_image.opts(width=600, invert_axes=False, invert_yaxis=False, 
#                        invert_xaxis=False, cmap='Viridis', xticks=variable_names, 
#               xrotation=45, tools=['hover'], colorbar=True, shared_axes=False)
#             weights_image.append(delta_image)
# weights_image[0]+weights_image[1]+weights_image[2]

In [None]:
# plot the average std

# allocate a list for the data
average_std = []
# std_std = []

# get the mice for this dataset
mice = data.keys()

# for all the mice
for mouse in mice:

    # get the dates for this mouse
    dates = data[mouse].keys()
    # for all the dates
    for day in dates:
        # get the matrix
        weights = weights_dict[(mouse,day)]
        # average and store
        temp_std = np.std(weights, axis=2)
        temp_std = temp_std/np.amax(temp_std)
        average_std.append(temp_std)
#         std_std.append(np.std(weights, axis=2))

average_plots = np.array(np.mean(average_std, axis=0))
std_plot = np.array(sem(average_std, axis=0))

# plot
std_list = []
# for all conditions
for idx, bino in enumerate(bino_vector):
    # get the x axis
    x_range = np.arange(average_plots.shape[1])
    lines = hv.Curve((x_range, average_plots[idx, :]), vdims='Weight Variation').opts(padding=0.1, xticks=x_labels, xrotation=45)
    shadows = hv.ErrorBars((x_range, average_plots[idx, :], std_plot[idx, :]))
    std_list.append(lines*shadows)

hv.Overlay(std_list)