In [None]:
import os
import sys
sys.path.insert(0, os.path.abspath(r'D:\Code Repos\prey_capture'))

import panel as pn
import holoviews as hv
from holoviews import opts, dim
hv.extension('bokeh')
from bokeh.resources import INLINE

import paths
import functions_plotting as fp
import functions_loaders as fl
import functions_bondjango as bd
import processing_parameters

import importlib
import numpy as np
import pandas as pd
import h5py
import scipy.stats as stat
import sklearn.preprocessing as prep
from holoviews import Store
import datetime
import umap
from rastermap import Rastermap


In [None]:
# set up the figure config
importlib.reload(fp)
importlib.reload(processing_parameters)
# define the target saving path
save_path = os.path.join(paths.figures_path, 'Regression_vis')

# define the printing mode
save_mode = True
# define the target document
target_document = 'paper'
# set up the figure theme
fp.set_theme()
# load the label dict
label_dict = processing_parameters.label_dictionary
variable_list = processing_parameters.variable_list

In [None]:
def match_cells(files):
    # assemble the preproc path
    files_preproc = files.replace('_combinedanalysis', '_preproc')
    # open the file
    with pd.HDFStore(files_preproc, 'r') as preproc:
        if '/cell_matches' in preproc.keys():
            # get the matches
            cell_matches = preproc['cell_matches']

            print(animal, day_s, files_preproc)
            # get the idx for this file
            current_matches = cell_matches[datetime.datetime.strftime(day, '%m_%d_%Y')].to_numpy()
            current_idx = np.argsort(current_matches).astype(float)
            # remove the nan entries
            current_idx = current_idx[~np.isnan(np.sort(current_matches))]
    return current_idx

In [None]:
# get the entries

# Load the desired files
importlib.reload(processing_parameters)

# load the constants from the regression calculation
time_shifts = processing_parameters.time_shifts
shift_dict = {el: idx for idx, el in enumerate(time_shifts)}
shift_number = len(time_shifts)
shuffles = processing_parameters.regression_repeats

# load the variable list
variable_list = processing_parameters.variable_list
# assemble the dataframe columns
reals = ['real_'+str(el) for el in time_shifts]
shuffle_means = ['smean_'+str(el) for el in time_shifts]
columns = reals + shuffle_means + ['mouse', 'day']

# get the search list
search_list = processing_parameters.search_list

# allocate a list for all paths (need to preload to get the dates)
all_paths = []
all_results = []
# for all the search strings
for search_string in search_list:

    # query the database for data to plot
    data_all = bd.query_database('analyzed_data', search_string)
    data_path = [el['analysis_path'] for el in data_all if '_combinedanalysis' in el['slug']]
    data_result = [el['result'] for el in data_all if '_combinedanalysis' in el['slug']]
    all_paths.append(data_path)
    all_results.append(data_result)
# get the dates present
data_dates = np.unique([os.path.basename(el)[:10] for el in np.concatenate(all_paths)])
print(f'Dates present: {data_dates}')

In [None]:
# main loop

# allocate the outputs
correlations = []
# get the regression types
regressors = processing_parameters.regressors

# for all the list items
for idx0, data_path in enumerate(all_paths):

    # for all the files
    for idx1, files in enumerate(data_path):
        
        # if a habi trial, skip
        if 'habi' in files:
            continue
        
        # get the animal and date from the slug
        name_parts = os.path.basename(files).split('_')
        animal = '_'.join(name_parts[7:10])
        day_s = '_'.join(name_parts[:3])
        time_s = '_'.join(name_parts[3:6])
        day = datetime.datetime.strptime(day_s, '%m_%d_%Y')
        # skip if the animal and day are already evaluated, 
        # since the CC is the same for the whole day
        if animal+'_'+day_s in joint_list:
            skip_flag = True
        else:
            skip_flag = False
            animal_list.append(animal)
            day_list.append(day)
            joint_list.append(animal+'_'+day_s)
        
#         # get the cell matches (UNUSED FOR NOW)
#         current_idx = match_cells(files)
        
        # load the data and the cell matches (wasteful, but cleaner I think)
        with h5py.File(files, 'r') as h:
            if 'regression' not in h.keys():
                continue
            # get the keys present
            key_list = h['regression'].keys()
            # for all the variables
            for feature in variable_list:
                cc_feature_list = []
                # get the feature keys
                current_feature = [el for el in key_list if feature in el]
                # for all the regression types
                for reg in regressors:
                    # get the relevant keys
                    current_regressor = [el for el in current_feature if reg in el]
                    # for real vs shuffle
                    for rvs in ['real', 'shuffle']:
                        # get the real/shuffle keys
                        current_rvs = [el for el in current_regressor if rvs in el]
                        # for the time shifts
                        for shift in time_shifts:
                            # get the current time keys
                            current_shift = [el for el in current_rvs if str(shift) in el]
                            
                            # process the correlations (exclude the std from reps)
                            current_correlation = [el for el in current_shift if ('cc' in el) and ('_std' not in el)]
                            assert len(current_correlation) == 1, 'more than one item in the cc list'
                            cc_feature_list.append([feature, np.array(h['/regression/'+current_correlation[0]]), reg, rvs, str(shift), animal, day])
                            
                            # process the weights for the linear decoder
                            if reg == 'linear':
                                
                # save the entries as a feature in the dict
                correlations.extend(cc_feature_list) 
#                 print(correlations)
#                 raise ValueError

# convert to dataframe
correlations = pd.DataFrame(correlations, columns=['feature', 'cc', 'regressor', 'rvs', 'shift', 'mouse', 'day'])
correlations['cc'] = correlations['cc'].astype(float)

print(f'Shape of the last feature dataframe: {correlations.shape}')            

In [None]:
# load the desired files and their associated regressions

# Load the desired files
importlib.reload(processing_parameters)

# load the constants from the regression calculation
time_shifts = processing_parameters.time_shifts
shift_dict = {el: idx for idx, el in enumerate(time_shifts)}
shift_number = len(time_shifts)
shuffles = processing_parameters.regression_repeats

# load the variable list
variable_list = processing_parameters.variable_list
# assemble the dataframe columns
reals = ['real_'+str(el) for el in time_shifts]
shuffle_means = ['smean_'+str(el) for el in time_shifts]
columns = reals + shuffle_means + ['mouse', 'day']

# get the search list
search_list = processing_parameters.search_list

# allocate a list for all paths (need to preload to get the dates)
all_paths = []
all_results = []
# for all the search strings
for search_string in search_list:

    # query the database for data to plot
    data_all = bd.query_database('analyzed_data', search_string)
    data_path = [el['analysis_path'] for el in data_all if '_combinedanalysis' in el['slug']]
    data_result = [el['result'] for el in data_all if '_combinedanalysis' in el['slug']]
    all_paths.append(data_path)
    all_results.append(data_result)
# get the dates present
data_dates = np.unique([os.path.basename(el)[:10] for el in np.concatenate(all_paths)])
print(f'Dates present: {data_dates}')

# allocate memory for the resulting dataframe
data = {}
weights = {}
predictions = {}
predictions_meta = {}

day_list = []
animal_list = []
joint_list = []
# for all the list items
for idx0, data_path in enumerate(all_paths):

    # for all the files
    for idx1, files in enumerate(data_path):
        
        # if a habi trial, skip
        if 'habi' in files:
            continue
        
        # get the animal and date from the slug
        name_parts = os.path.basename(files).split('_')
        animal = '_'.join(name_parts[7:10])
        day_s = '_'.join(name_parts[:3])
        time_s = '_'.join(name_parts[3:6])
        day = datetime.datetime.strptime(day_s, '%m_%d_%Y')
        # skip if the animal and day are already evaluated, 
        # since the CC is the same for the whole day
        if animal+'_'+day_s in joint_list:
            skip_flag = True
        else:
            skip_flag = False
            animal_list.append(animal)
            day_list.append(day)
            joint_list.append(animal+'_'+day_s)
        # assemble the preproc path
        files_preproc = files.replace('_combinedanalysis', '_preproc')
        # open the file
        with pd.HDFStore(files_preproc, 'r') as preproc:
            if '/cell_matches' in preproc.keys():
                # get the matches
                cell_matches = preproc['cell_matches']
                
                print(animal, day_s, files_preproc)
                # get the idx for this file
                current_matches = cell_matches[datetime.datetime.strftime(day, '%m_%d_%Y')].to_numpy()
                current_idx = np.argsort(current_matches).astype(float)
                # remove the nan entries
                current_idx = current_idx[~np.isnan(np.sort(current_matches))]
        
        # load the data and the cell matches (wasteful, but cleaner I think)
        with h5py.File(files, 'r') as h:
            
            # for all the target variables
            for target_variable in variable_list:
                # create an empty list only if it's the same time this variable runs
                if (target_variable not in data.keys()) and ('linear' in target_variable):
                    data[target_variable] = []
                    weights[target_variable] = []
                    predictions[target_variable] = []
                
                
                # allocate memory for the real and shuffled regressions
                real_array = np.zeros((shift_number))
                shuffle_array = np.zeros((shift_number))
                real_weight = []
                shuffle_weight = []
                real_svr_array = np.zeros((shift_number))
                shuffle_svr_array = np.zeros((shift_number))
                
#                 real_prediction = []
#                 shuffle_prediction = []

                if 'regression' not in h.keys():
                    continue

                # for all the keys (will iterate through shifts and reps for shuffle)
                for key in h['/regression'].keys():
                    print(key)
                    # skip if it's not the target variable or is one of the error terms
                    if (target_variable not in key) | ('_std' in key):
                        continue
                    # get the time shift 
                    key_parts = key.split('_')
                    shift = int([el[5:] for el in key_parts if 'shift' in el][0])
                    
                    if ('cc' in key) & (~skip_flag):

                        if 'real' in key_parts:
                             # save the values
                            real_array[shift_dict[shift]] = np.array(h['/regression/'+key])
                        else:
                            shuffle_array[shift_dict[shift]] = np.array(h['/regression/'+key])

                    elif ('coefficients' in key and shift == 0) & (~skip_flag):
                        if 'real' in key_parts:
                            real_weight = np.array(h['/regression/'+key])
                        else:
                            shuffle_weight = np.array(h['/regression/'+key])
                    elif ('prediction' in key and shift == 0):
                        if 'real' in key_parts:
                            real_prediction = np.array(h['/regression/'+key])
                        else:
                            shuffle_prediction = np.array(h['/regression/'+key])
                    else:
                        continue
                if not skip_flag:
                    # add the columns to the main list
                    data[target_variable].append(list(real_array) + list(shuffle_array) + [animal, day])

                    # take only the non time shift (need to check)
                    if isinstance(real_weight, list):
                        continue
                    # assemble the weight dataframe
                    temp_df = pd.DataFrame(np.vstack((real_weight, shuffle_weight)).T, columns=['weight', 'shuffle_weight'])
                    temp_df['match_id'] = current_idx 
                    temp_df['animal'] = animal
                    temp_df['day'] = day
                    # store
                    weights[target_variable].append(temp_df)
                # format the datetime for the predictions
                date_time = str(datetime.datetime.strptime('_'.join(name_parts[:6]), '%m_%d_%Y_%H_%M_%S'))
                # store the predictions
                predictions[target_variable].append([real_prediction, shuffle_prediction, animal, date_time])

# for all the variables once more
for target_variable in variable_list:
    # turn the overall list into a dataframe
    data[target_variable] = pd.DataFrame(data[target_variable], columns=columns).sort_values(['mouse', 'day'], axis=0)
    # turn the weights into a dictionary
    weights[target_variable] = pd.concat(weights[target_variable], axis=0)
    # same with the predictions
    predictions_meta[target_variable] = []

    temp_data = []
    temp_meta = []
    for el in predictions[target_variable]:
        # assemble the meta 
        mouse = el[2]
        date_time = el[3]
        temp_meta.append([mouse, date_time])

        # get the actual predictions
        temp_data.append([el[0], el[1]])
    # pack the predictions and meta
    predictions[target_variable] = temp_data
    del temp_data
    predictions_meta[target_variable] = pd.DataFrame(temp_meta, columns=['mouse', 'datetime'])

    print(f'Shape of the data dictionary: {data[target_variable].shape}')
    print(f'Shape of the weights dataframe: {weights[target_variable].shape}')
    print(f'Shape of the prediction list: {len(predictions[target_variable])}')

In [None]:
# plot the performance over days

# define the target time point
target_tpoint = '0'
# allocate the plot list
plot_list = []

# for all the variables
for target_variable in variable_list:
    
    # group the real and shuffle data from the center time point over days
#     means = data[target_variable].groupby(['mouse', 'day'], as_index=False)[['real_'+target_tpoint, 'smean_'+target_tpoint]].mean()
#     sems = data[target_variable].groupby(['mouse', 'day'], as_index=False)[['real_'+target_tpoint, 'smean_'+target_tpoint]].sem().fillna(0).drop(['mouse', 'day'], axis=1)
#     sems = sems.rename({el:el+'_sem' for el in sems.columns}, axis=1)
    # concatenate
#     collapsed_data = pd.concat((means, sems), axis=1)
#     collapsed_data = means
    collapsed_data = data[target_variable][['real_'+target_tpoint, 'smean_'+target_tpoint, 'mouse', 'day']]
    # allocate a list for this feature's plot
    mouse_list = []
    # for all the mice
    for mouse_name, mouse_data in collapsed_data.groupby(['mouse']):
        
        # reformat day as a delta
        day_data = mouse_data.loc[:, 'day'].to_numpy().copy()
        delta_days = [(el-day_data[0]) for el in day_data]
        delta_days = (delta_days/np.timedelta64(1, 'D')).astype(int)
        mouse_data.loc[:, 'day'] = delta_days
        
        mouse_data = mouse_data.iloc[:10, :]

        # plot
        real_mean = hv.Scatter(mouse_data, kdims='day', vdims='real_'+target_tpoint)
        real_mean.opts(width=400, color='r', title=target_variable)
#         real_sem = hv.Spread(mouse_data, kdims='day', vdims=['real_'+target_tpoint, 'real_'+target_tpoint+'_sem'])
        shuffle_mean = hv.Scatter(mouse_data, kdims='day', vdims='smean_'+target_tpoint)
        shuffle_mean.opts(color='k')
#         shuffle_sem = hv.Spread(mouse_data, kdims='day', vdims=['smean_'+target_tpoint, 'smean_'+target_tpoint+'_sem'])
        
#         mouse_list.append(real_mean*real_sem*shuffle_mean*shuffle_sem)
        mouse_list.append(real_mean*shuffle_mean)
    plot_list.append(hv.Overlay(mouse_list))
hv.Layout(plot_list)
#     raise ValueError

In [None]:
# performance across days and mice

# plot the performance over days

# define the target time point
target_tpoint = '0'
# allocate the plot list
plot_list = []

# for all the variables
for target_variable in variable_list:
    # get the current feature
    current_feature = data[target_variable].copy()
    
    # replace the day column for a delta
    # allocate memory for the new dates
    new_days = []
    # for all the mice
    for mouse_name, mouse_data in current_feature.groupby(['mouse'])['day']:
        # reformat day as a delta
        day_data = mouse_data.to_numpy()
        delta_days = [(el-day_data[0]) for el in day_data]
        delta_days = (delta_days/np.timedelta64(1, 'D')).astype(int)
        new_days.append(delta_days)
    # replace the days
    current_feature.loc[:, 'day'] = np.hstack(new_days)
    
    # group the real and shuffle data from the center time point over days
    means = current_feature.groupby(['day'], as_index=False)[['real_'+target_tpoint, 'smean_'+target_tpoint]].mean()
    sems = current_feature.groupby(['day'], as_index=False)[['real_'+target_tpoint, 'smean_'+target_tpoint]].sem().fillna(0).drop(['day'], axis=1)
    sems = sems.rename({el:el+'_sem' for el in sems.columns}, axis=1)
    # concatenate
    collapsed_data = pd.concat((means, sems), axis=1).iloc[:10, :]

    # plot
    real_mean = hv.Curve(collapsed_data, kdims='day', vdims='real_'+target_tpoint)
    real_mean.opts(width=400, color='r', title=target_variable)
    real_sem = hv.Spread(collapsed_data, kdims='day', vdims=['real_'+target_tpoint, 'real_'+target_tpoint+'_sem'])
    real_sem.opts(width=400, color='r', title=target_variable)
    shuffle_mean = hv.Curve(collapsed_data, kdims='day', vdims='smean_'+target_tpoint)
    shuffle_mean.opts(color='k')
    shuffle_sem = hv.Spread(collapsed_data, kdims='day', vdims=['smean_'+target_tpoint, 'smean_'+target_tpoint+'_sem'])
    shuffle_sem.opts(color='k')

    plot_list.append(real_mean*real_sem*shuffle_mean*shuffle_sem)
#     plot_list.append(hv.Overlay(mouse_list))
hv.Layout(plot_list)


In [None]:
# plot the average kernel per animal compared to shuffle

overlay_list = []
# for all the target variables
for target_variable in variable_list:
    # average across days for each animal
    averages = data[target_variable].groupby(['mouse',], as_index=False)[reals+shuffle_means].mean()
    sems = data[target_variable].groupby(['mouse',], as_index=False)[reals+shuffle_means].sem().fillna(0)

    # allocate a list for the plots
    plot_list = []
    # for all the mice
    for idx, (mouse, df)  in enumerate(averages.groupby(['mouse'])):
        real_plot = hv.Scatter((time_shifts, df.loc[:, reals].to_numpy().flatten()))
        real_plot.opts(width=400, height=400, title=target_variable, color='red')
#         real_sem = hv.Spread((time_shifts, df.loc[:, reals].to_numpy().flatten(), sems.loc[idx, reals].to_numpy().flatten()))
#         real_sem.opts(color='red')
        shuffle_plot = hv.Scatter((time_shifts, df.loc[:, shuffle_means].to_numpy().flatten()))
        shuffle_plot.opts(color='black')
#         shuffle_error = hv.Spread((time_shifts, df.loc[:, shuffle_means].to_numpy().flatten(), sems.loc[idx, shuffle_means].to_numpy().flatten()))
#         shuffle_error.opts(color='black')

#         plot_list.append(real_plot*real_sem*shuffle_plot*shuffle_error)
        plot_list.append(real_plot*shuffle_plot)
    

    overlay_list.append(hv.Overlay(plot_list))

hv.Layout(overlay_list).cols(3)


In [None]:
# Average across mice and time

overlay_list = []
# for all the target variables
for target_variable in variable_list:
    # average across days for each animal
#     averages = data[target_variable].groupby(, as_index=False)[reals+shuffle_means+shuffle_sems].mean()
    averages = data[target_variable].loc[:, reals+shuffle_means].mean(axis=0)
#     print(target_variable, data[target_variable])
#     raise ValueError
    sems = data[target_variable].loc[:, reals+shuffle_means].sem(axis=0)
    
    real_plot = hv.Curve((time_shifts, averages.loc[reals].to_numpy().flatten()), kdims='Time shift', vdims='Performance (cc)')
    real_sem = hv.Spread((time_shifts, averages.loc[reals].to_numpy().flatten(), sems.loc[reals].to_numpy().flatten()))
    real_plot.opts(width=400, height=400, title=target_variable)
    shuffle_plot = hv.Curve((time_shifts, averages.loc[shuffle_means].to_numpy().flatten()))
    shuffle_error = hv.Spread((time_shifts, averages.loc[shuffle_means].to_numpy().flatten(), sems.loc[shuffle_means].to_numpy().flatten()))

    overlay_list.append(real_plot*real_sem*shuffle_plot*shuffle_error)
    


hv.Layout(overlay_list).cols(3)
    

In [None]:
print(hv.render(violin.BoxWhisker.I).renderers[0]._property_values['glyph'].__dict__)
# hv.help(hv.BoxWhisker)

# Regression box plot 

In [None]:

# define the target shift
tshift = '0'

# generate 2 plots with half of the variables each
sub_dataframe = correlations.iloc[((correlations['rvs']=='real')&(correlations['regressor']=='linear')&(correlations['shift']==tshift)).values, :].copy()
sub_dataframe['feature'] = [label_dict[el] for el in sub_dataframe['feature']]
violin0 = hv.BoxWhisker(sub_dataframe, ['feature'], ['cc'])
violin0.opts(width=800, height=800, xrotation=45, ylabel='CC (a.u.)', ylim=(-0.2, 0.6), box_fill_color='#00ffff')

sub_dataframe = correlations.iloc[((correlations['rvs']=='shuffle')&(correlations['regressor']=='linear')&(correlations['shift']==tshift)).values, :].copy()
sub_dataframe['feature'] = [label_dict[el] for el in sub_dataframe['feature']]
violin1 = hv.BoxWhisker(sub_dataframe, ['feature'], ['cc'])
violin1.opts(box_fill_color='#999999')

violin = violin0 * violin1
violin.opts(opts.BoxWhisker(xlabel='', box_line_width=1, whisker_line_width=1, outlier_line_width=0))

# assemble the file name
save_name = os.path.join(save_path, '_'.join((target_document, 'Reg_CC_linear')) + '.png')
# save the figure
violin = fp.save_figure(violin, save_name, fig_width=10, dpi=1200, fontsize=target_document, target='save')


In [None]:

# define the target shift
tshift = '0'
# 4aa2d9 f05236
# generate 2 plots with half of the variables each
sub_dataframe = correlations.iloc[((correlations['rvs']=='real')&(correlations['regressor']=='SVR')&(correlations['shift']==tshift)).values, :].copy()
sub_dataframe['feature'] = [label_dict[el] for el in sub_dataframe['feature']]
violin0 = hv.BoxWhisker(sub_dataframe, ['feature'], ['cc'])
violin0.opts(width=800, height=800, xrotation=45, ylabel='CC (a.u.)', ylim=(-0.2, 0.6), box_fill_color='#ff00ff')

sub_dataframe = correlations.iloc[((correlations['rvs']=='shuffle')&(correlations['regressor']=='SVR')&(correlations['shift']==tshift)).values, :].copy()
sub_dataframe['feature'] = [label_dict[el] for el in sub_dataframe['feature']]
violin1 = hv.BoxWhisker(sub_dataframe, ['feature'], ['cc'])
violin1.opts(box_fill_color='#999999')

violin = violin0 * violin1
violin.opts(opts.BoxWhisker(xlabel='', box_line_width=1, whisker_line_width=1, outlier_line_width=0))

# assemble the file name
save_name = os.path.join(save_path, '_'.join((target_document, 'Reg_CC_SVR')) + '.png')
# save the figure
violin = fp.save_figure(violin, save_name, fig_width=10, dpi=1200, fontsize=target_document, target='save')


In [None]:
# plot the SVR vs linear cc

# define the target shift
tshift = '0'

# generate 2 plots with half of the variables each
sub_dataframe = correlations.iloc[((correlations['rvs']=='real')&(correlations['regressor']=='SVR')&(correlations['shift']==tshift)).values, :].copy()
sub_dataframe['feature'] = [label_dict[el] for el in sub_dataframe['feature']]
violin0 = hv.BoxWhisker(sub_dataframe, ['feature'], ['cc'])
violin0.opts(width=800, height=800, xrotation=45, ylabel='CC (a.u.)', ylim=(-0.2, 0.6), box_fill_color='#ff00ff', box_fill_alpha=1)

sub_dataframe = correlations.iloc[((correlations['rvs']=='real')&(correlations['regressor']=='linear')&(correlations['shift']==tshift)).values, :].copy()
sub_dataframe['feature'] = [label_dict[el] for el in sub_dataframe['feature']]
violin1 = hv.BoxWhisker(sub_dataframe, ['feature'], ['cc'])
violin1.opts(box_fill_color='#00ffff', box_fill_alpha=1)

violin = violin0 * violin1
violin.opts(opts.BoxWhisker(xlabel='', box_line_width=1, whisker_line_width=1, outlier_line_width=0))

# assemble the file name
save_name = os.path.join(save_path, '_'.join((target_document, 'Reg_CC_SVRvLinear')) + '.png')
# save the figure
violin = fp.save_figure(violin, save_name, fig_width=10, dpi=1200, fontsize=target_document, target='save')

In [None]:
for feature in variable_list:
    x = correlations.iloc[((correlations['feature']==feature)&(correlations['rvs']=='real')&(correlations['regressor']=='linear')&(correlations['shift']==tshift)).values, :].loc[:, 'cc']
    y = correlations.iloc[((correlations['feature']==feature)&(correlations['rvs']=='real')&(correlations['regressor']=='SVR')&(correlations['shift']==tshift)).values, :].loc[:, 'cc']
    
    test = stat.mannwhitneyu(x, y)
    print(feature, test[1]*len(variable_list))

In [None]:
for feature in variable_list:
    x = correlations.iloc[((correlations['feature']==feature)&(correlations['rvs']=='real')&(correlations['regressor']=='linear')&(correlations['shift']==tshift)).values, :].loc[:, 'cc']
    y = correlations.iloc[((correlations['feature']==feature)&(correlations['rvs']=='shuffle')&(correlations['regressor']=='linear')&(correlations['shift']==tshift)).values, :].loc[:, 'cc']
    
    test = stat.mannwhitneyu(x, y)
    print(feature, test[1]*len(variable_list))

In [None]:
for feature in variable_list:
    x = correlations.iloc[((correlations['feature']==feature)&(correlations['rvs']=='real')&(correlations['regressor']=='SVR')&(correlations['shift']==tshift)).values, :].loc[:, 'cc']
    y = correlations.iloc[((correlations['feature']==feature)&(correlations['rvs']=='shuffle')&(correlations['regressor']=='SVR')&(correlations['shift']==tshift)).values, :].loc[:, 'cc']
    
    test = stat.mannwhitneyu(x, y)
    print(feature, test[1]*len(variable_list))

In [None]:
plot_list = []
for feature in variable_list:
    x = correlations.iloc[((correlations['feature']==feature)&(correlations['rvs']=='real')&(correlations['regressor']=='linear')&(correlations['shift']==tshift)).values, :]
    y = correlations.iloc[((correlations['feature']==feature)&(correlations['rvs']=='real')&(correlations['regressor']=='SVR')&(correlations['shift']==tshift)).values, :]

    plot = hv.Scatter((x['cc'], y['cc']))
    corr = stat.spearmanr(x['cc'], y['cc'], nan_policy='omit')[0]
    mean_cc = x['cc'].mean()
    plot.opts(width=400, height=400, tools=['hover'], xlabel='Linear', ylabel='SVR', title=f'{feature} {corr:.2f} {mean_cc:0.2f}', xlim=(-0.2, 0.6), ylim=(-0.2, 0.6))
    plot_list.append(plot)

layout = hv.Layout(plot_list).cols(4).opts(shared_axes=True)
layout


In [None]:
# plot the distributions per variable
importlib.reload(fp)
importlib.reload(processing_parameters)
variable_list = processing_parameters.variable_list
label_dict = processing_parameters.label_dictionary

# define the target time point
tpoint = 0
real_list = []
shuffle_list = []

# def format_whisker()

# define the variables to include
include_variables = variable_list
# for all the variables
for target_variable in include_variables:
    # get only the 0 lag value for each day
    real_data = data[target_variable].loc[:, ['real_'+str(tpoint)]]
    real_data['Feature'] = label_dict[target_variable]
    real_list.append(real_data)
    
    shuffle_data = data[target_variable].loc[:, ['smean_'+str(tpoint)]]
#     shuffle_data = shuffle_data.rename({'smean_'+str(tpoint): 'real_'+str(tpoint)}, axis=1)
    shuffle_data['Feature'] = label_dict[target_variable]#+' shuffle'
    shuffle_list.append(shuffle_data)
    
    x_mwu = real_data.loc[:, 'real_'+str(tpoint)].to_numpy()
    x_mwu = x_mwu[~np.isnan(x_mwu)]
    y_mwu = shuffle_data.loc[:, 'smean_'+str(tpoint)].to_numpy()
    y_mwu = y_mwu[~np.isnan(y_mwu)]
    test = stat.mannwhitneyu(x_mwu, y_mwu)
    print(target_variable, test[1]*len(include_variables))

# get the number of plots
number_plots = len(real_list)
# get the half index
half_index = int(np.ceil(number_plots/2)) + 1
# generate 2 plots with half of the variables each
violin0 = hv.BoxWhisker(pd.concat(real_list, axis=0), ['Feature'], ['real_'+str(tpoint)])
violin0.opts(width=800, height=800, xrotation=45, ylabel='CC (a.u.)')

violin1 = hv.BoxWhisker(pd.concat(shuffle_list, axis=0), ['Feature'], ['smean_'+str(tpoint)])

violin = violin0 * violin1
violin.opts(opts.BoxWhisker(xlabel='', box_line_width=1, whisker_line_width=1, outlier_line_width=0))

# assemble the file name
save_name = os.path.join(save_path, '_'.join((target_document, 'Reg_CC')) + '.png')
# save the figure
violin = fp.save_figure(violin, save_name, fig_width=10, dpi=1200, fontsize=target_document, target='screen')
# violin
# violin0
# violin1 = hv.BoxWhisker(pd.concat(real_list[half_index:], axis=0), ['Feature'], ['real_'+str(tpoint)])
# violin1.opts(width=100, height=800, xrotation=45, ylabel='CC (a.u.)')

# (violin0+violin1).cols(1).opts(shared_axes=False)
# violin0
# violin1 = hv.Violin(pd.concat(shuffle_list, axis=0), ['Feature'], ['smean_'+str(tpoint)])
# violin1.opts(width=800, height=800, xrotation=45)

# (violin0*violin1)

In [None]:
# plot the CC over time

# initialize a list for the plots
time_plot = []

# define the target timepoint
tpoint = 10
# for all the variables
for target_variable in variable_list:
    temp_list = []
    # for all the mice
    for mouse, df_ori in data[target_variable].groupby(['mouse']):
        # copy to not mess the original dataframe
        df = df_ori.copy()
        df = df.sort_values(['day'], axis=0).reset_index(drop=True)
        # get the delta time
        delta_time = [(el-df['day'][0]).days for el in df['day']]

        real_plot = hv.Curve((delta_time, df.loc[:, 'real_'+str(tpoint)]))
        real_plot.opts(title=target_variable, width=400, height=400, xlabel='Time (days)', ylabel='CC (a.u.)')
        shuffle_plot = hv.Curve((delta_time, df.loc[:, 'smean_'+str(tpoint)]))
#         shuffle_error = hv.Spread((delta_time, df.loc[:, 'smean_'+str(tpoint)], df.loc[:, 'ssem_'+str(tpoint)]))
        
        

        temp_list.append(real_plot*shuffle_plot)
#         temp_list.append(real_plot)
    time_plot.append(hv.Overlay(temp_list))

hv.Layout(time_plot).cols(3)
    
    
    

In [None]:
# get only the weights
output_df = []

# for all the variables
for idx, target_feature in enumerate(weights.keys()):
    # get the dataframe for this feature
    current_feature = weights[target_feature]
    # if it's the first one, also get the mouse and day
    if idx == 0:
        mouse_day = current_feature.loc[:, ['animal', 'day']]
        # allocate memory for the output
        temp_time = []
        # for all the mice
        for mouse_name, mouse_data in mouse_day.groupby(['animal'], as_index=False):
            # reformat day as a delta
            day_data = mouse_data.loc[:, 'day'].to_numpy().copy()
            delta_days = [(el-day_data[0]) for el in day_data]
            delta_days = (delta_days/np.timedelta64(1, 'D')).astype(int)
#             mouse_data.loc[:, 'day'] = delta_days
            temp_time.append(delta_days)
#             raise ValueError
        mouse_day['day'] = np.concatenate(temp_time, axis=0)
        output_df.append(mouse_day.to_numpy()) 
    
    # get only the weights and store in the output dataframe
    output_df.append(current_feature.loc[:, ['weight']].to_numpy())    

# convert to dataframe
output_df = pd.DataFrame(np.hstack(output_df), columns=['animal', 'day']+list(weights.keys()))
print(output_df.shape)



In [None]:
# calculate the correlation matrix for the variables

feature_matrix = output_df.loc[:, variable_list]
correlation_matrix, pvalue_matrix = stat.spearmanr(feature_matrix)

print(correlation_matrix.shape)


# Correlation plot 

In [None]:
# ticks = [(idx+0.5, el) for idx, el in enumerate(variable_list)]

# raster = hv.Raster(correlation_matrix)
# raster.opts(width=800, height=600, yticks=ticks, xticks=ticks, xrotation=45, colorbar=True, cmap='RdBu', clim=(-1, 1), tools=['hover'])

ticks = [(idx+0.5, label_dict[el]) for idx, el in enumerate(variable_list)]
# ticks = [(idx+0.5, idx) for idx, el in enumerate(variable_list)]
plot_matrix = correlation_matrix.copy()
plot_matrix = np.tril(plot_matrix, k=0)
plot_matrix[plot_matrix==0] = np.nan
# hv.Raster(correlation_matrix)
raster = hv.Raster(plot_matrix)
# format the plot
raster = fp.format_figure(raster, width=950, height=800, yticks=ticks, xticks=ticks, colorbar=True, cmap='RdBu', clim=(-1, 1), xrotation=45)

# assemble the file name
save_name = os.path.join(save_path, '_'.join((target_document, 'Reg_correlation')) + '.png')
# save the figure
fig = fp.save_figure(raster, save_name, fig_width=15, dpi=1200, fontsize=target_document, target='save')

In [None]:
%%time
# perform a umap decomposition of the weights across variables

# format the data
umap_process = output_df.drop(['animal', 'day'], axis=1).to_numpy()

# normalize the umap data per column
# scaler = prep.StandardScaler()
# umap_process = scaler.fit_transform(umap_process)
umap_process = np.abs(umap_process)/np.abs(umap_process).max(axis=0)

# umap_data = output_df.to_numpy()

# run the decomposition
reducer = umap.UMAP(min_dist=0.5, n_neighbors=30)
embedded_data = reducer.fit_transform(umap_process)

# UMAP regression plot

In [None]:
# plot the decomposition

# define the interval between points
interv = 1
perc = 95

importlib.reload(fp)

umap_list = []
# target_key = 'cricket_0_mouse_distance'
# for all the variables
for target_key in variable_list + ['animal', 'day']:
    if target_key in ['animal', 'day']:
        counts, raw_labels = np.unique(output_df.loc[:, target_key].to_numpy(), return_inverse=True)
        raw_labels = (raw_labels - raw_labels.min())/(raw_labels.max() - raw_labels.min())
        title = target_key
    else:
#     #     counts, raw_labels = np.unique(output_df.loc[:, target_key].to_numpy(), return_inverse=True)
        raw_labels = np.abs(output_df.loc[:, target_key].to_numpy().astype(np.float64))
        raw_labels[raw_labels>np.percentile(raw_labels, perc)] = np.percentile(raw_labels, perc)
        raw_labels[raw_labels<np.percentile(raw_labels, 100-perc)] = np.percentile(raw_labels, 100-perc)
        title = label_dict[target_key]
    #     raw_labels /= np.nanmax(raw_labels)

    compiled_labels = np.expand_dims(raw_labels, axis=1)

    umap_data = np.concatenate((embedded_data,compiled_labels),axis=1)

    compiled_labels = compiled_labels[::interv]
    umap_data = umap_data[::interv, :]
    
    umap_plot = hv.Scatter(umap_data, vdims=['Dim 2', target_key], kdims=['Dim 1'])
    umap_plot.opts(color=target_key, colorbar=True, cmap='Spectral', size=1, tools=['hover'])
    umap_plot.opts(height=600, width=800, xaxis=None, yaxis=None, colorbar=False, title=title)
    
    save_name = os.path.join(save_path, '_'.join((target_document, 'Reg_UMAP', target_key)) + '.png')
    # save the figure
    fig = fp.save_figure(umap_plot, save_name, fig_width=7.7, dpi=1200, fontsize=target_document, target='save', display_factor=0.3)
    umap_list.append(umap_plot)

# hv.Layout(umap_list).cols(2)

# Colorbar generation 

In [None]:
# generate a colorbar

# define the target color map
target_cmap = 'Spectral_r'

cbar_data = np.array([np.arange(0, 255, 1), np.arange(0, 255, 1)])
cbar = hv.Raster(1-cbar_data.T)
cbar.opts(tools=['hover'], cmap=target_cmap, xaxis=None, yaxis=None, width=100, height=400)

save_name = os.path.join(save_path, '_'.join((target_document, 'Colorbar', target_cmap)) + '.png')
# save the figure
cbar = fp.save_figure(cbar, save_name, fig_width=1, dpi=1200, fontsize=target_document, target='save', display_factor=0.3)

In [None]:
def gini(array):
    """Calculate the Gini coefficient of a numpy array. From https://neuroplausible.com/gini"""
    # All values are treated equally, arrays must be 1d:
    array = array.flatten()
    if np.amin(array) < 0:
        # Values cannot be negative:
        array -= np.amin(array)
    # Values cannot be 0:
    array += 0.0000001
    # Values must be sorted:
    array = np.sort(array)
    # Index per array element:
    index = np.arange(1,array.shape[0]+1)
    # Number of array elements:
    n = array.shape[0]
    # Gini coefficient:
    return ((np.sum((2 * index - n  - 1) * array)) / (n * np.sum(array)))

In [None]:
def gini2(array, bins=30):
    """Calculate the Gini coefficient according to de Oliveira and Kim et al."""
    # bin the data
    counts, bin_edges, _ = stat.binned_statistic(np.abs(array), array, bins=bins, statistic='count')
    
    # get the fractions
    fractions = counts/counts.sum()
    # multiply by the counts
    values = (bin_edges[1:] + bin_edges[:-1])/2
    s = np.cumsum(fractions * values)
    s0 = np.concatenate(([0], s[:-1]), axis=0)

    # calculate the coefficient
    gini_coefficient = 1 - np.sum(fractions*(s0 + s))/s[-1]
    
    return gini_coefficient
    

In [None]:
# Calculate and plot Gini coefficient

# allocate memory for the calculation
gini_array = []
# for all the variables
for animal_date, current_day in output_df.groupby(['animal', 'day'], as_index=False):
    # allocate memory for the day
    day_list = []
    # for all the features
    for feature in variable_list:
        # get the feature
        current_feat = current_day[feature].to_numpy().astype(np.float64)
#         print(current_feat.shape)
        
        # calculate the gini coefficient and store
        current_gini = gini2(current_feat, bins=20)

        day_list.append(pd.DataFrame([[label_dict[feature], current_gini]], columns=['Feature', 'Gini']))
        
    # store
    gini_array.append(pd.concat(day_list, axis=0))
    
gini_array = pd.concat(gini_array, axis=0)
print(gini_array)

In [None]:
%%time
# calculate the Gini coefficient based on resampled weights

# define the number of shuffles
number_shuffles = 100
# allocate a list for the output
shuffle_gini = []
# for all the shuffles
for shuff in np.arange(number_shuffles):

    # for all the variables
    for animal_date, current_day in output_df.groupby(['animal', 'day'], as_index=False):
        # allocate memory for the day
        day_list = []
        # for all the features
        for feature in variable_list:
            # get the feature
            current_feat = current_day[feature].to_numpy().astype(np.float64)
            a = np.min(current_feat)
            b = np.max(current_feat)
            current_feat = (b - a) * np.random.random_sample(current_feat.shape[0]) + a
            # draw randomly from the feature
#             current_feat = np.random.choice(current_feat, current_feat.shape[0], replace=True)
#             current_feat = np.mean(current_feat)*np.ones_like(current_feat)
#             current_feat = np.random.randn(current_feat.shape[0])
#             print(current_feat)

            # calculate the gini coefficient and store
            current_gini = gini2(current_feat, bins=20)

            day_list.append(pd.DataFrame([[label_dict[feature], current_gini]], columns=['Feature', 'Gini']))

        # store
        shuffle_gini.append(pd.concat(day_list, axis=0))
# concatenate the dataframes
shuffle_gini = pd.concat(shuffle_gini, axis=0)

# Gini coefficient plot 

In [None]:
# Plot the gini coefficients

importlib.reload(fp)
# print(plot_array.columns)
# print(plot_array)

# print(plot_array)
# ticks = [(idx+0.5, label_dict[el]) for idx, el in enumerate(variable_list)]

whisker0 = hv.BoxWhisker(gini_array, ['Feature'], ['Gini'])
whisker0.opts(width=800, height=800, xrotation=45, ylabel='Sparsity', xlabel='')

whisker1 = hv.BoxWhisker(shuffle_gini, ['Feature'], ['Gini'])
whisker = hv.Overlay([whisker0, whisker1])
whisker.opts(opts.BoxWhisker(box_line_width=1, whisker_line_width=1, outlier_line_width=1))

# assemble the file name
save_name = os.path.join(save_path, '_'.join((target_document, 'Reg_gini')) + '.png')
# save the figure
fig = fp.save_figure(whisker, save_name, fig_width=10, dpi=1200, fontsize=target_document, target='save')

# hv.Scatter((variable_list, gini_array)).opts(xrotation=45, width=1000, height=600)

In [None]:
# load the behavior

importlib.reload(processing_parameters)
importlib.reload(fl)

# get the paths from the database using search_list
all_paths, all_queries = fl.query_search_list()
# print(all_paths)

behavior_list = []
# load the data
for path, queries in zip(all_paths, all_queries):
    
    temp_data, _, _  = fl.load_preprocessing(path, queries)
    behavior_list.append(temp_data)

variable_list = processing_parameters.variable_list
# for all the data, keep only the behavioral variables of interest
data_behavior = []

for idx, el in enumerate(behavior_list):
    current_paths = all_paths[idx]
    for idx2, el2 in enumerate(el):
        
        if 'habi' in current_paths[idx2]:
            continue
        try:
            data_behavior.append(el2[variable_list+['mouse', 'datetime']])
        except KeyError:
            continue

del behavior_list

In [None]:
print(len(data_behavior))

In [None]:
%%time
# calculate prediction accuracy under defined conditions

# define the parameter of interest
target_parameter = 'cricket_0_mouse_distance'
# get its ranges
parameter_ranges = processing_parameters.tc_params[target_parameter]
# define the number of splits
number_bins = 3
# determine the bin edges based on the range and number of splits
bin_edges = np.linspace(parameter_ranges[0], parameter_ranges[1], number_bins+1)
bin_centers = (bin_edges[1:] + bin_edges[:-1])/2

# get the data meta
data_meta = [[el.loc[0, 'mouse'], el.loc[0, 'datetime']] for el in data_behavior]
data_meta = pd.DataFrame(data_meta, columns=['mouse', 'datetime'])
# allocate memory for the output
binned_prediction = []
# for all the variables
for feature in variable_list:
    # get the meta
    meta = predictions_meta[feature]
    
    # for all the trials
    for idx, trial in enumerate(predictions[feature]):
        # get the current meta
        current_mouse = meta.loc[idx, 'mouse']
        current_datetime = meta.loc[idx, 'datetime']

        # get the data
        current_data = [el for trial_idx, el in enumerate(data_behavior) 
                        if (current_mouse == data_meta.loc[trial_idx, 'mouse']) & (current_datetime == data_meta.loc[trial_idx, 'datetime'])]
        if len(current_data) == 0:
            continue
        else:
            current_data = current_data[0]
        
        # get the target and feature
        current_target = current_data[target_parameter]
        current_data = current_data[feature]
        # get the prediction
        current_pred = trial[0]
        # get the shuffle
        current_shuffle = trial[1]
        # split the data
        bin_vector = np.digitize(current_target, bin_edges)

        # for all the ranges
        for bin_ in np.arange(1, number_bins+1):
            # get the selection vector
            selection_vector = bin_vector == bin_
            # skip calculation if too few samples
            if np.sum(selection_vector) >= 10:
                # get the relevant portions of the data
                bin_data = current_data[bin_vector == bin_]
                bin_pred = current_pred[bin_vector == bin_]
                bin_shuffle = current_shuffle[bin_vector == bin_]
                # calculate the accuracy of the prediction in this range
                pred_accuracy = stat.spearmanr(bin_data, bin_pred, nan_policy='omit')[0]
                # calculate the accuracy of the shuffle in this range
                shuffle_accuracy = stat.spearmanr(bin_data, bin_shuffle, nan_policy='omit')[0]
            else:
                pred_accuracy = np.nan
                shuffle_accuracy = np.nan
            # assemble the element to save
            save_element = [pred_accuracy, shuffle_accuracy, bin_, current_mouse, current_datetime, feature]
            # store
            binned_prediction.append(save_element)
# turn the output into a dataframe
binned_prediction = pd.DataFrame(binned_prediction, columns=['prediction', 'shuffle', 'bin', 'mouse', 'datetime', 'feature'])

# Average binned decoding matrix 

In [None]:
# plot the decoding accuracies across variables and bins

# plot a matrix with the average accuracy across trials
average_real = binned_prediction.drop(columns=['shuffle']).groupby(['bin', 'feature'], as_index=False).mean()
average_shuffle = binned_prediction.drop(columns=['prediction']).groupby(['bin', 'feature'], as_index=False).mean()

# allocate memory for the matrix
real_matrix = np.zeros((number_bins, len(variable_list)))
shuffle_matrix = np.zeros((number_bins, len(variable_list)))

for idx, feature in enumerate(variable_list):
    for bin_ in np.arange(1, number_bins+1):
        target_idx = [row_idx for row_idx, el in average_real.iterrows() if (el['bin'] == bin_) & (el['feature'] == feature)][0]

        real_matrix[bin_-1, idx] = average_real.loc[target_idx, 'prediction']
        shuffle_matrix[bin_-1, idx] = average_shuffle.loc[target_idx, 'shuffle']

raster0 = hv.Raster(real_matrix)
raster1 = hv.Raster(shuffle_matrix)

xticks = [(idx+0.5, label_dict[el]) for idx, el in enumerate(variable_list)]
yticks = [(idx+0.5, f'{el:0.2f}') for idx, el in enumerate(bin_centers)]
raster0.opts(width=1000, height=500, tools=['hover'], cmap='Magma', 
             xticks=xticks, yticks=yticks, xrotation=45, ylabel=label_dict[target_parameter], xlabel='', colorbar=True, clim=(-0.05, 0.4))
raster1.opts(width=1000, height=500, tools=['hover'], cmap='Magma', 
             xticks=xticks, yticks=yticks, xrotation=45, ylabel=label_dict[target_parameter], xlabel='', colorbar=True, clim=(-0.05, 0.4))
# raster = (raster0+raster1).cols(1)
# raster.opts(opts.Raster(width=800, height=400, tools=['hover'], cmap='Magma', xticks=xticks, yticks=yticks, xrotation=45, ylabel=label_dict[target_parameter], xlabel='', colorbar=True))
# raster

# assemble the file name
save_name = os.path.join(save_path, '_'.join((target_document, 'Reg_binned', target_parameter, 'real')) + '.png')
# save the figure
fig0 = fp.save_figure(raster0, save_name, fig_width=15, dpi=1200, fontsize=target_document, target='both')

# assemble the file name
save_name = os.path.join(save_path, '_'.join((target_document, 'Reg_binned', target_parameter, 'shuffle')) + '.png')
# save the figure
fig1 = fp.save_figure(raster1, save_name, fig_width=15, dpi=1200, fontsize=target_document, target='both')

In [None]:
def layout_function(input_data, target_field, color_dict, marker):
    # reset index
    current_data = input_data.reset_index(drop=True)
    # filter the days after 10
    selection_vector = current_data['datetime'].to_numpy() < 6
    current_data = current_data.iloc[selection_vector, :]
    # slightly shift the datetime to see all points based on bin
    current_data['datetime'] += 0.3*(bin_name-1)
#     # rectify the values
#     selection_vector = np.argwhere(current_data[target_field].to_numpy() < 0).flatten()
#     current_data.loc[selection_vector, [target_field]] = 0

    # create the plot
    curve = hv.Scatter(current_data, kdims='datetime', vdims=target_field, label='bin' + str(bin_name))
    curve.opts(width=400, height=300, color=color_dict[bin_name], title=label_dict[feature_name], marker=marker, size=5)
    
    return curve

In [None]:
# plot lines to show error

# define the colors for the bins
bin_colors = {
    1: '#0000FF',
    2: '#00FF00',
    3: '#FF0000',
}

bin_colors_shuffle = {
    1: '#880088',
    2: '#008800',
    3: '#880000',
}
# allocate the plot dict
plot_dict = {}
# for all the mice
for mouse_name, mouse_data in binned_prediction.groupby(['mouse'], as_index=False):
    # copy the mouse data
    current_mouse = mouse_data.copy()
    # get the datetime info and 0 it
    date_time = np.array([datetime.datetime.strptime(el, '%Y-%m-%d %H:%M:%S') for el in current_mouse['datetime'].to_numpy()])
    date_time -= np.min(date_time)
    date_time = [el.days for el in date_time]
    # replace the original time
    current_mouse['datetime'] = date_time
    # average across dates
    average_real_mouse = current_mouse.drop(columns=['shuffle']).groupby(['datetime', 'bin', 'feature'], as_index=False).mean()
    average_shuffle_mouse = current_mouse.drop(columns=['prediction']).groupby(['datetime', 'bin', 'feature'], as_index=False).mean()
    
    # for all the features
    for (feature_name, bin_name), real_data in average_real_mouse.groupby(['feature', 'bin']):
        
        # create a new key if not there
        if feature_name not in plot_dict.keys():
            plot_dict[feature_name] = []
        
        # plot and save
        plot_dict[feature_name].append(layout_function(real_data, 'prediction', bin_colors, 'o'))

    # for all the features
    for (feature_name, bin_name), shuffle_data in average_shuffle_mouse.groupby(['feature', 'bin']):
        
        # create a new key if not there
        if feature_name not in plot_dict.keys():
            plot_dict[feature_name] = []
        # plot and save
        plot_dict[feature_name].append(layout_function(shuffle_data, 'shuffle', bin_colors_shuffle, '*'))

In [None]:
# combine the overlays and plot

# allocate the layout list
layout_list = []
# for all the features
for feature in variable_list:
    layout_list.append(hv.Overlay(plot_dict[feature]))
    
layout = hv.Layout(layout_list).cols(4).opts(shared_axes=False)
layout

In [None]:
print(binned_prediction.shape)