In [None]:
# imports
import logging
import os
import sys
sys.path.insert(0, os.path.abspath(r'D:\Code Repos\prey_capture'))


import panel as pn
import holoviews as hv
from holoviews import opts, dim
from holoviews.operation import histogram
hv.extension('bokeh')
from bokeh.resources import INLINE

import paths
import functions_bondjango as bd
import functions_misc as fm
import functions_plotting as fp
import pandas as pd
import numpy as np
import sklearn.mixture as mix
import sklearn.decomposition as decomp
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict, train_test_split
from sklearn import svm, datasets
from sklearn import preprocessing
import sklearn.linear_model as lin
import sklearn.metrics as smet
import scipy.signal as ss
import scipy.stats as stat
import scipy.optimize as opt

import random
# import functions_data_handling as fd
# import functions_vame as fv
import importlib
import processing_parameters
# import PSID
# from PSID.evaluation import evalPrediction
# import sklearn.cross_decomposition as cros
# import umap


In [None]:
def maxmin(array_in):
    return (array_in-np.nanmin(array_in))/(np.nanmax(array_in)-np.nanmin(array_in))

def basis_predictors(variable, basis_number, kernel, kernel_spacing, total_length,label):
    # initialize the output dataframe
    out_frame = pd.DataFrame()
    # generate the displaced basis functions
    for idx2 in np.arange(basis_number):
        # generate the sizes of the before and after padding of the kernel
        back = int(kernel_spacing*idx2)
        front = int(total_length-kernel.shape[0]-back)
        # generate the full kernel
        if back == 0:
            current_kernel = np.concatenate((kernel, np.zeros(front)))
        elif idx2 == basis_number-1:
            current_kernel = np.concatenate((np.zeros(back), kernel))
        else:
            current_kernel = np.concatenate((np.zeros(back), kernel, np.zeros(front)))

        # convolve with the data
        vector = np.convolve(variable, current_kernel, 'same')
        # normalize to 0-1
        vector = maxmin(vector)
        # if the vector was all zeros, it'll turn into nans so remove
        vector[np.isnan(vector)] = 0

        # generate the field in the new data frame
        out_frame[label+'_'+str(idx2)] = vector
        
    return out_frame

In [None]:
%%time
# Load the desired files
importlib.reload(processing_parameters)

# define the threshold for matched cells
match_threshold = 10
# get the data paths
try: 
    data_path = snakemake.input[0]
except NameError:
    # get the search list
    search_list = processing_parameters.search_list
    # allocate memory for the data
    pre_normal_data = []
    pre_mod_data = []
    
    # allocate a list for all paths (need to preload to get the dates)
    all_paths = []
    # for all the search strings
    for search_string in search_list:

        # query the database for data to plot
        data_all = bd.query_database('analyzed_data', search_string)
        data_all = [el for el in data_all if 'preproc' in el['slug']]
        data_path = [el['analysis_path'] for el in data_all if '_preproc' in el['slug']]
        all_paths.append(data_path)
    # get the dates present
    data_dates = np.unique([os.path.basename(el)[:10] for el in np.concatenate(all_paths)])
    print(f'Dates present: {data_dates}')
    # now load the files
    for data_path in all_paths:
        # load the calcium data
        beh_data = []
        # for all the files
        for files in data_path:
            # load the data
            with pd.HDFStore(files) as h:
                beh_data.append(h['full_traces'])
                if '/matched_calcium' in h.keys():
                    # get the cell matches
                    cell_matches = h['cell_matches']
                    
                    # perform only if there are more files
                    if len(data_dates) > 1:
#                         print('Successful match')
                        match_dates = [el for el in data_dates if el in cell_matches.columns]
                        # get only the days present in the search
                        cell_matches = cell_matches[match_dates]
                        # generate a list with the number of days and the number of cells kept

                        # get the unique cell combinations
                        # unique contains the unique patterns followed by cells across days
                        # inverse indicates which pattern is followed by each cell
                        # count contains the number of times each pattern is found
                        unique, inverse, counts = np.unique(~np.isnan(cell_matches.to_numpy()), axis=0, 
                                        return_counts=True, return_inverse=True)
                        # remove the single day and no day cases
                        counts[np.sum(unique, axis=1)==0] = 0
                        counts[np.sum(unique, axis=1)==1] = 0

                        # get an index vector with only the most popular pattern
                        # (regardless of how many cells share it)
                        cell_idx = np.array(inverse==np.argmax(counts))

                        cell_matches = cell_matches.iloc[cell_idx, :]
                    else:
                        counts = 1
                        cell_idx = cell_matches[data_dates].to_numpy()
                        cell_idx = ~np.isnan(cell_idx)
                        cell_matches = cell_matches.iloc[cell_idx, :]
                        unique = np.array([[1]])
                    # concatenate the latents
                    dataframe = pd.concat([h['matched_calcium'], h['latents']], axis=1)

                    # separate based on normal vs mod
                    if 'dark' in files:
                        pre_mod_data.append((files, dataframe, cell_matches))
                    else:
                        pre_normal_data.append((files, dataframe,  cell_matches))
                    
print(f'Number of matched cells: {np.sum(cell_idx)}')
print(f'Number of matched trials: {unique[np.argmax(counts)].sum()}')

In [None]:
# Leave only common cells across all datasets

# allocate memory for the cleaned up data
normal_data = []
mod_data = []

# print(pre_normal_data[0][2])
# for all the normal trials
for idx, el in enumerate(pre_normal_data):
    # get the date
    current_date = os.path.basename(el[0])[:10]
    # get the corresponding indexes
    current_idx = el[2][current_date].to_numpy()
    # if they're all nans, skip the day
    if np.isnan(np.sum(current_idx)):
        continue
    # get the current df
    current_df = el[1]
    labels = list(current_df.columns)
    cells = [el for el in labels if 'cell' in el]
    not_cells = [el for el in labels if 'cell' not in el]
    # get the non-cell data
    non_cell_data = current_df[not_cells]
    # get the current calcium data
    cell_data = current_df[cells]
    # remove the non matched cells
    cell_data = cell_data.iloc[:, current_idx]
    # rename the cell fields
    cell_names = ['cell_' + str(el) for el in np.arange(cell_data.shape[1])]
    cell_data.columns = cell_names
    # normalize the single trial activity
#     cell_data = (cell_data-cell_data.mean())/cell_data.std()
#     cell_data = cell_data/cell_data.std()
#     cell_data = (cell_data-cell_data.min())/(cell_data.max()-cell_data.min())

    # calculate a baseline for all cells
    for name, single in cell_data.items():
        # skip if there are only zeros
        if np.sum(single) == 0:
            continue
        # get the baseline
        baseline = np.percentile(single[single>0], 8)
        # get the dF/F
#         single = (single-baseline)/baseline
        # clip the trace
        single[single<baseline] = 0
        # store
        cell_data[name] = single

    # remove the nans after normalization
    cell_data[np.isnan(cell_data)] = 0
    # assemble a new data frame with only the matched cells and the rest of the data
    normal_data.append(pd.concat((non_cell_data, cell_data), axis=1))
    
print(normal_data[0].shape)
print(normal_data[0].columns)


In [None]:
# set up the feature and calcium matrices
# list the radial features in the dataset
radial_features = ['cricket_0_delta_heading', 'cricket_0_visual_angle', 'mouse_heading', 
                   'cricket_0_delta_head', 'cricket_0_heading', 'head_direction']
# define the design matrix
feature_list = ['mouse_speed', 'cricket_0_speed', 'mouse_x', 'mouse_y', 'cricket_0_x', 'cricket_0_y',
                'cricket_0_delta_heading', 'cricket_0_mouse_distance', 'cricket_0_visual_angle',
               'mouse_heading', 'cricket_0_delta_head', 'cricket_0_heading', 'head_direction',
               'latent_0', 'latent_1', 'latent_2', 'latent_3', 'latent_4',
               'latent_5', 'latent_6', 'latent_7', 'latent_8', 'latent_9']
# feature_list = ['mouse_speed']

# define the frame rate (fps)
frame_rate = 10
# define the width of the kernel (s), multiplied to convert to frames
sigma = 1*frame_rate
# calculate the kernel
kernel = ss.gaussian(sigma*5, sigma)
# define the number of basis functions per regressor
basis_number = 9
# define the kernel spacing (in s)
kernel_spacing = 0.2*frame_rate
# get the total length of the kernel
total_length = kernel_spacing*(basis_number-1) + kernel.shape[0]
# # get the start positions of the basis functions (assume sigma defines the interval)
# basis_starts = [int(el) for el in np.arange(-sigma*((basis_number-1)/2), 
#                                        sigma*((basis_number-1)/2)+1, sigma)]
# allocate memory for the output
feature_trials = []
# allocate memory for a data frame without the encoding model features
feature_raw_trials = []
# allocate memory for the calcium
calcium_trials = []
# get the number of trials
trial_number = len(normal_data)
# get the features
for idx, el in enumerate(normal_data):
    # get the intersection of the labels
    label_intersect = [feat for feat in feature_list if feat in el.columns]
    
    if len(label_intersect) != len(feature_list):
        continue
    # get the features of interest
    target_features = el.loc[:, feature_list]
    # save the original features for simpler calculations
    feature_raw_trials.append(target_features.copy())
    # get the original columns
    original_columns = target_features.columns
    
    # turn the radial variables into linear ones
    # for all the columns
    for label in original_columns:
        # calculate head speed
        if label == 'head_direction':
            # get the head direction
            head = target_features[label].copy().to_numpy()
            # get the angular speed and acceleration of the head
            speed = np.concatenate(([0], np.diff(ss.medfilt(head, 21))), axis=0)
            acceleration = np.concatenate(([0], np.diff(head)), axis=0)
            # add to the features
            target_features['head_speed'] = speed
            target_features['head_acceleration'] = acceleration
        # check if the feature is radial
        if label in radial_features:
            # get the feature
            rad_feature = target_features[label].copy().to_numpy()
            # convert to radians
            rad_feature = np.deg2rad(rad_feature)
            # perform angular decomposition (assume unit circle)
            x = np.cos(rad_feature)
            y = np.sin(rad_feature)
            # replace the original column by the extracted ones
            target_features[label+'_x'] = x
            target_features[label+'_y'] = y
            # drop the original column
            target_features.drop(labels=label, axis=1, inplace=True)
        # check if the label is a speed and calculate acceleration
        if 'speed' in label:
            # get the speed
            speed = target_features[label].copy().to_numpy()
            # calculate the acceleration with the smoothed speed
            acceleration = np.concatenate(([0], np.diff(ss.medfilt(speed, 21))), axis=0)
            # add to the features
            target_features[label.replace('speed', 'acceleration')] = acceleration
    
    # Generate the gaussian convolved and displaced regressors
    # allocate an empty dataframe for the outputs
    new_dataframe = pd.DataFrame()
    # for all the regressors
    for label in target_features:
        # get the variable
        variable = target_features[label].to_numpy().copy()
        # Remove nans
        variable[np.isnan(variable)] = 0

        # get the basis function-based predictors
        out_frame = basis_predictors(variable, basis_number, kernel, kernel_spacing, total_length,label)
        # add to the dataframe
        new_dataframe = pd.concat((new_dataframe, out_frame), axis=1)

    # add a constant factor
    constant = np.ones(new_dataframe.shape[0])
    new_dataframe['constant'] = constant
    # add a trial factor
#     new_dataframe['trial'] = idx*np.ones(vector.shape[0])
#     # for all the trials
#     for trial in np.arange(trial_number):
#         new_dataframe['trial_'+str(trial)] = np.zeros(vector.shape[0])
#         if trial == idx:
#             new_dataframe['trial_'+str(trial)] += 1

    # replace the old dataframe with the new one
    target_features = new_dataframe
        
    # store the columns
    resulting_columns = target_features.columns
    # turn the dataframe into an array
    target_features = target_features.to_numpy()

    # store the array
    feature_trials.append(target_features)
    
    # get the calcium data
    cells = [cell for cell in el.columns if 'cell' in cell]
    cells = el.loc[:, cells].to_numpy()

    # store
    calcium_trials.append(cells)
    

# # concatenate the data
# feature_matrix = np.concatenate(feature_matrix, axis=0)
# calcium_matrix = np.concatenate(calcium_matrix, axis=0)

# # define the number of time points to accumulate
# time_points = 1

# if time_points > 1:
#     # allocate memory for the matrix
#     design_matrix = np.zeros((feature_matrix.shape[0], feature_matrix.shape[1]*time_points))
#     # create the source matrix
#     source_matrix = np.vstack((np.zeros((time_points, feature_matrix.shape[1])), feature_matrix))

#     # for all the timepoints
#     for times in np.arange(feature_matrix.shape[0]):
#         design_matrix[times, :] = source_matrix[times:times+time_points, :].flatten()

#     # replace the calcium data
#     feature_matrix = design_matrix

print(f'Time by features: {feature_trials[0].shape}')
print(f'Time by ROIs: {calcium_trials[0].shape}')
print(resulting_columns)

In [None]:
# plot one of the design matrices
hv.Image(feature_trials[0].T).opts(width=800, height=600, tools=['hover'])

In [None]:
# plot the calcium
hv.Image(calcium_trials[0].T).opts(width=800, height=600, tools=['hover'])

In [None]:
# plot a histogram of the calcium data

freq, bins = np.histogram(calcium_matrix.flatten(), bins=100)

hv.Bars((bins, freq)).opts(width=600).opts(xrotation=45, logy=True)

In [None]:
%%time
# run the GLM

# define the trials to run together
trial_groups = [list(np.arange(len(feature_trials)))]
# allocate memory for the performances
train_list = []
test_list = []
# allocate memory for the predictions and the weights
predictions = []
weights = []
# get the cell number
cell_number = calcium_matrix.shape[1]

# for all the trial groups
for trial_idx in trial_groups:
    # concatenate the trials 
    feature_matrix = np.concatenate([feature_trials[el] for el in trial_idx], axis=0)
    calcium_matrix = np.concatenate([calcium_trials[el] for el in trial_idx], axis=0)
    
    # for all the cells
    for idx, cell in enumerate(calcium_matrix.T):

        # convolve the cell with the kernel used above
        cell = np.convolve(cell, kernel, 'same')
    #     cell = cell > 0

        # copy the feature matrix for scaling
        feature = feature_matrix.copy()
#         # add coupled predictors
#         # extract the activity of the other cells
#         # get a vector of indexes
#         cell_idx = [el for el in list(np.arange(cell_number)) if el != idx]
#         other_calcium = calcium_matrix[:, cell_idx]
#         # get the average activity
#         other_average = np.expand_dims(np.mean(other_calcium, axis=1), axis=1)
#         # get the NMF-reduced activity
#         nmf = decomp.NMF(n_components=15)
#         other_nmf = nmf.fit_transform(other_calcium)
#         # generate the predictors
#     #     ca_variables = np.concatenate((other_calcium, other_average), axis=1)
#     #     ca_variables = other_nmf
#         ca_variables = np.concatenate((other_nmf, other_average), axis=1)

#         # initialize a dataframe for the predictors
#         temp_frame = pd.DataFrame()
#         # for all the cells
#         for others in np.arange(ca_variables.shape[1]):
#             # load the cell
#             variable = ca_variables[:, others]
#             # initialize a random label (won't use them)
#             label = str(others)
#             # get the predictors
#             out_frame = basis_predictors(variable, 2, kernel, kernel_spacing, total_length, label)
#             # put in the new frame
#             temp_frame = pd.concat((temp_frame, out_frame), axis=1)
#         # add to the feature matrix
#         feature = np.concatenate((feature, temp_frame.to_numpy()), axis=1)    

    #     feature = np.exp(feature)
    #     feature = np.log10(feature/(1-feature))
    #     feature[np.isinf(feature)] = 0
        # split the data
        feature_train, feature_test, cell_train, cell_test = \
            train_test_split(feature, cell, test_size=0.3, shuffle=False)
        # generate the scaler using only the train set
    #     scaler = preprocessing.StandardScaler().fit(feature_train)
        scaler = preprocessing.MaxAbsScaler().fit(feature_train)
    #     scaler = preprocessing.RobustScaler().fit(feature_train)
    #     scaler = preprocessing.MinMaxScaler().fit(feature_train)
        # scale the features
        feature_train = scaler.transform(feature_train)
        feature_test = scaler.transform(feature_test)
        feature_all = scaler.transform(feature)

        # initialize the regressor
    #     linear = lin.ElasticNetCV(max_iter=5000, l1_ratio=[.1, .5, .7, .9, .95, .99, 1], 
    #                                        n_jobs=7, alphas=[.001, .01, .1, 1, 10, 100])
    #                                        n_jobs=7, alphas=[.001])
        linear = lin.TweedieRegressor(alpha=.01, max_iter=5000, fit_intercept=False, power=1)
    #     linear = lin.LogisticRegression(C=.001, penalty='l2', max_iter=5000, 
    #                                     solver='lbfgs', class_weight='balanced')
    #     linear = lin.ElasticNet(alpha=0.01, positive=True, fit_intercept=False)

        # train the classifier
        linear.fit(feature_train, cell_train)
        # predict train and test
        linear_pred = linear.predict(feature_train)
        linear_pred_last = linear.predict(feature_test)
        # save the predictions
        predictions.append(linear.predict(feature_all))
        # save the weights
        weights.append(linear.coef_)

    #     # try the NNLS from scipy
    #     linear, rnorm = opt.nnls(feature_train, cell_train, maxiter=5000)
    #     print(rnorm)
    #     linear_pred = feature_train@linear
    #     linear_pred_last = feature_test@linear
    #     predictions.append(feature_matrix@linear)

        # save the performances
        try:
            train_perf = smet.r2_score(cell_train, linear_pred)
            test_perf = smet.r2_score(cell_test, linear_pred_last)

        except TypeError:
            train_perf = smet.accuracy_score(cell_train, linear_pred)
            test_perf = smet.accuracy_score(cell_test, linear_pred_last)

        train_list.append(train_perf)
        test_list.append(test_perf)
    # report the number of positive test fits
    print(f'Positive cell fits: {np.sum([1 for el in test_list if el > 0])}')
    # generate histograms with the performances
    train_hist, train_edges = np.histogram(train_list)
    test_hist, test_edges = np.histogram(test_list, bins=50)

    train_h = hv.Bars((train_edges, train_hist)).opts(xrotation=45, width=800)
    test_h = hv.Bars((test_edges, test_hist)).opts(xrotation=45, width=800)
    # also a scatter with train and test for every trial
    combined = hv.Scatter((train_list, test_list)).opts(xrotation=45, ylim=(-1, 1))

    (train_h+test_h+combined).opts(width=800, shared_axes=False).cols(1)

    

In [None]:
# plot the fits

# define the target cells
# target_cells = [0, 1, 2, 3, 4]
# target_cells = np.argsort([el1*el2 for el1, el2 in zip(test_list, train_list)])[-5:]
# target_cells = np.argsort(train_list)[-5:]
target_cells = np.argsort(test_list)[-5:]

# allocate memory for the plots
real_list = []
pred_list = []
# for all the target cells
for cell in target_cells[::-1]:
    # get the cell trace
    cell_trace = calcium_matrix[:, cell]
#     percentile = np.percentile(cell_trace, 30)
#     cell_trace[cell_trace<percentile] = 0
    cell_trace = np.convolve(cell_trace, kernel, 'same')
#     cell_trace = cell_trace > 0
    # get the corresponding prediction
    prediction_trace = predictions[cell]
#     prediction_trace = np.convolve(prediction_trace, kernel, 'same')
#     prediction_trace = maxmin(prediction_trace)
    
    # plot them
    real_trace = hv.Curve(cell_trace, kdims=['Time'], vdims=['Activity'],
                          ).opts(width=800, tools=['hover'])
    pred_trace = hv.Curve(prediction_trace).opts(width=800)
#     feature_trace = hv.Curve(feature_all[:, 0]).opts(width=800)
    real_list.append(real_trace*pred_trace)
#     pred_list.append(pred_trace)

# generate the layout
# hv.Layout(real_list)*hv.Layout(pred_list)
hv.Layout(real_list).opts(shared_axes=False).cols(1)

In [None]:
# Visualize weights

# turn the weights into an array
weight_array = np.vstack(weights)
roi_number = weight_array.shape[0]

print(weight_array.shape)
weight_array = weight_array[:, :-1].reshape((roi_number, -1, basis_number))

print(weight_array.shape)

# allocate memory for a plot list
weight_list = []

# for all cells
for cells in np.arange(weight_array.shape[0]):
    im = hv.Image((np.squeeze(weight_array[cells, :, :]))).opts(width=250, tools=['hover'])
    weight_list.append(im)
    
hv.Layout(weight_list).opts(shared_axes=False)
# # condense the regressors from each parameter into one
# weight_array = np.array([weight_array[:, el:el+basis_number].sum(axis=1) 
#                          for el in np.arange(int(weight_array.shape[1]/basis_number))])

# print(' '.join(('Regressors by cells:', str(weight_array.shape))))
# # visualize
# hv.Image(weight_array).opts(width=800, height=800)

In [None]:
# Calculate the per cell rev correlation

# define the interval for calculation in frames
target_interval = 10
# define the target behavioral variable
# target_behavior = 'cricket_0_mouse_distance'
# target_behavior = ['mouse_x', 'mouse_y', 'mouse_head_x', 'mouse_head_y', 
#                    'mouse_body2_x', 'mouse_body2_y', 'mouse_body3_x', 'mouse_body3_y', 
#                    'mouse_base_x', 'mouse_base_y', 'mouse_speed',
#                    'cricket_0_x', 'cricket_0_y','cricket_0_mouse_distance',
#                    'cricket_0_delta_heading', 'cricket_0_visual_angle']

# get the behavioral data
# current_behavior = np.expand_dims(data.loc[:, target_behavior].to_numpy(), axis=1)
# current_behavior = data.loc[:, target_behavior].to_numpy()
beh_columns = len(resulting_columns)
current_behavior = feature_matrix.copy()
# # concatenate all the data
# data = pd.concat(normal_data, axis=0)

# # get the available columns
# labels = list(data.columns)
# cells = [el for el in labels if 'cell' in el]
# # get the cell data
# calcium_data = np.array(data[cells].copy()).T
# # scale for min and max
# calcium_data = (calcium_data - np.nanmin(calcium_data))/\
#                (np.nanmax(calcium_data) - np.nanmin(calcium_data))

# calcium_data = calcium_matrix.T.copy()
# # get rid of the nan values
# calcium_data[np.isnan(calcium_data)] = 0
# get the number of time points
time_number = calcium_matrix.shape[0]
# get the number of cells
roi_number = calcium_matrix.shape[1]

print(f'Number of features: {beh_columns}')
print(f'Number of ROIs: {roi_number}')
print(f'Number of time points: {time_number}')

# pad the behavior data
# padded_behavior = np.vstack((np.zeros((target_interval, 1)), current_behavior)).T

padded_behavior = np.vstack((np.zeros((target_interval, beh_columns)), current_behavior, 
                             np.zeros((target_interval, beh_columns)))).T
# remove nans (heads up, there are a lot)
padded_behavior[np.isnan(padded_behavior)] = 0


# allocate memory to save the ols
ols_list = []
prediction_list = []
r2_list = []
# for all the cells
for cells in np.arange(roi_number):
#     print(f'current cell: {cells}')
    # allocate memory for the padded matrix
#     time_matrix = np.zeros((time_number, target_interval))
    time_matrix = np.zeros((time_number, 2*target_interval*beh_columns))
    # get the calcium data
    current_calcium = calcium_matrix[:, cells:cells+1]
        
    # fill the padded matrix
    for frame, el in enumerate(current_calcium):
#         time_matrix[frame, :] = padded_behavior[:, frame:frame+target_interval]
        time_matrix[frame, :] = padded_behavior[:, frame:frame+2*target_interval].flatten()
    # calculate the covariance matrix
    cov_matrix = time_matrix.T@time_matrix
    # calculate the STA
    sta = time_matrix.T@current_calcium
    try:
        # calculate the ols estimate
        ols = np.linalg.inv(cov_matrix) @ sta   
    except np.linalg.LinAlgError:
        ols = []
        
    ols_list.append(ols)
#     ols_estimator = lin.PoissonRegressor(alpha=10, max_iter=5000, fit_intercept=False)
#     ols_estimator = lin.TweedieRegressor(alpha=0, max_iter=10000, fit_intercept=True, power=1)
#     ols_estimator.fit(time_matrix, current_calcium.ravel())
    # save the ols
#     ols_list.append(ols_estimator.coef_)
#     current_prediction = ols_estimator.predict(time_matrix)
    
#     print(current_prediction)
#     prediction_list.append(current_prediction)
#     r2_list.append(smet.r2_score(current_calcium, current_prediction))
    

In [None]:
# Visualize the STAs

# allocate memory for the plots
plot_ols = []

# for all the estimates
for ols in ols_list:
    if isinstance(ols, list):
        continue    
    ols = ols.reshape(beh_columns, target_interval*2)
    
    # return the radial variables to angles
    new_radials = [el+'_x' for el in radial_features]
    new_ols = []
    new_features = []
    for idx, feature in enumerate(feature_list):
        if feature in new_radials:
            new_feature = np.rad2deg(np.arctan2(ols[idx+1, :], ols[idx, :]))
            new_ols.append(new_feature)
            new_features.append(feature[:-2])
        else:
            new_ols.append(ols[idx, :])
            new_features.append(feature)
    ols = np.array(new_ols)
                
    # generate the image
    current_plot = hv.Image(ols).opts(tools=['hover'])
#     current_plot = hv.Curve(ols[7, :])
    # save on the list
    plot_ols.append(current_plot)
print(new_features)
# show the plot
hv.Layout(plot_ols).opts(shared_axes=True).cols(3)


In [None]:
# Generate 2D "tuning curves"

# define trial groups
n_groups = 2
interval = np.floor(len(calcium_trials)/n_groups)
temp_vector = np.array(np.arange(interval))
trial_groups = [(temp_vector+idx*interval).astype(int) for idx in np.arange(n_groups)]

# define the features to use
tc_features = ['cricket_0_mouse_distance', 'cricket_0_delta_heading']
# tc_features = ['mouse_speed', 'latent_0']
# tc_features = ['latent_1', 'latent_0']
# tc_features = ['cricket_0_x', 'cricket_0_y']
# tc_features = ['mouse_speed', 'cricket_0_mouse_distance']
# tc_features = ['mouse_y', 'mouse_x']
# tc_features = ['cricket_0_visual_angle', 'cricket_0_delta_head']

print(feature_raw_trials[0].columns)
# allocate the meta plot list
meta_list = []
hist_list = []
# define the font size
fontsize = {
    'ticks': 11,
    'labels': 13
}

# define the plot labels
label_x = tc_features[1]
label_y = tc_features[0]
# for all the trial groups
for trial_idx in trial_groups:
    # allocate the plot list
    tc_list = []
    hist_temp_list = []
    
    feature_raw = [feature_raw_trials[el] for el in trial_idx]
    calcium_matrix = np.concatenate([calcium_trials[el] for el in trial_idx], axis=0)
    # get the relevant features
    feature0 = pd.concat([el[tc_features[0]] for el in feature_raw])
    feature0[np.isnan(feature0)] = 0
    feature0 = ss.medfilt(feature0, 21)
    feature1 = pd.concat([el[tc_features[1]] for el in feature_raw])
    feature1[np.isnan(feature1)] = 0
    feature1 = ss.medfilt(feature1, 21)

    print(calcium_matrix.shape)
    roi_number = calcium_matrix.shape[1]


    st_base, x_edge, y_edge, idx = \
        stat.binned_statistic_2d(feature0, feature1, [], bins=20, statistic='count')
    st_base[st_base==0] = 1
    plot_st_base = np.log10(st_base)
    plot_st_base[np.isinf(plot_st_base)] = 0
    # st_base = (st_base-np.nanmin(st_base))/(np.nanmax(st_base)-np.nanmin(st_base))
#     im_plot = hv.Image((plot_st_base), kdims=[tc_features[0], tc_features[1]], bounds=[0, 10, 0, 20])
    im_plot = hv.Image((y_edge, x_edge, plot_st_base), kdims=[label_x, label_y])
    im_plot.opts(width=250, tools=['hover'], fontsize=fontsize, xrotation=45)#, clim=(0, 0.2))

    tc_list.append(im_plot)
    # st_base = 1
    # store the matrices for averaging
    st_store = []
    # for all the cells
    for cells in np.arange(roi_number)[:]:
        # get the calcium
        current_calcium = calcium_matrix[:, cells]
    #     current_calcium = ss.medfilt(pd.concat([el['mouse_speed'] for el in feature_raw]), 21)

        # build 2d distribution
        st, x_edge, y_edge, idx = \
            stat.binned_statistic_2d(feature0, feature1, current_calcium, bins=20, statistic='sum')
    #     st[np.isnan(st)] = 0
    #     st = (st-np.nanmin(st))/(np.nanmax(st)-np.nanmin(st))
    #     st[np.isnan(st)] = 0
        st[st_base<3] = 0
        st_plot = (st/st_base)
        st_plot[np.isnan(st_plot)] = 0
        im_plot = hv.Image((y_edge, x_edge, st_plot), kdims=[label_x, label_y])
        im_plot.opts(width=250, tools=['hover'], cmap='viridis', fontsize=fontsize, xrotation=45)#, clim=(0, 0.2))
        tc_list.append(im_plot)
        st_store.append(st)
        
        # store the actual map too
        hist_temp_list.append(st_plot)

    plot_ave = np.mean(st_store, axis=0)/st_base
    # plot_ave[np.isinf(plot_ave)] = 0
    im_plot = hv.Image((y_edge, x_edge, plot_ave), kdims=[label_x, label_y])
    im_plot.opts(width=250, tools=['hover'], fontsize=fontsize, xrotation=45)#, clim=(0, 0.2))

    tc_list.append(im_plot)
    meta_list.append(tc_list)
    hist_list.append(hist_temp_list)
# reorder the list

# top_half = [el for idx, el in enumerate(tc_list) if idx < len(tc_list)/2]
# bottom_half = [el for idx, el in enumerate(tc_list) if idx >= len(tc_list)/2]
# tc_list = [val for pair in zip(top_half, bottom_half) for val in pair]
# lists = [l1, l2, ...]
# [val for tup in zip(*lists) for val in tup]
meta_list = [val for tup in zip(*meta_list) for val in tup]
# create the layout
hv.Layout(meta_list).opts(shared_axes=False)


In [None]:
# calculate correlations between maps and plot

# allocate memory for the correlations
correlation_array = []
# for all the cells
for one, two in zip(hist_list[0], hist_list[1]):
    # calculate the correlation between the pairs
    corr_coef = np.corrcoef(one.flatten(), two.flatten())[1][0]
    if np.isnan(corr_coef):
#         continue
        corr_coef = 0
    correlation_array.append(corr_coef)

correlation_array = np.array(correlation_array)

# define the number of shuffles
shuffle_number = 100
# allocate memory for the shuffle results
shuffle_array = np.zeros((correlation_array.shape[0], shuffle_number))
# for all the shuffles
for shuffle in np.arange(shuffle_number):
    # allocate memory for the correlations
    correlation_shuffle = []
    # shuffle the lists
    list_0 = random.sample(hist_list[0], len(hist_list[0]))
    list_1 = random.sample(hist_list[1], len(hist_list[0]))
    # for all the cells
    for one, two in zip(list_0, list_1):
        # calculate the correlation between the pairs
        corr_coef = np.corrcoef(one.flatten(), two.flatten())[1][0]
        if np.isnan(corr_coef):
            corr_coef = 0
#             continue
        correlation_shuffle.append(corr_coef)
    shuffle_array[:, shuffle] = correlation_shuffle

    
# turn into an array
freq, bins = np.histogram(correlation_array, density=True, bins=20)
bin_centers = bins[:-1] + np.diff(bins)/2
hist_ori = hv.Curve((bin_centers, np.cumsum(freq)), kdims=['Correlation', 'Probability'])
hist_ori.opts(fontsize=fontsize)

freq, bins = np.histogram(shuffle_array.flatten(), density=True, bins=bins)
bin_centers = bins[:-1] + np.diff(bins)/2
hist_shuffle = hv.Curve((bin_centers, np.cumsum(freq)))


(hist_ori*hist_shuffle)

In [None]:
# ROC analysis?

