In [1]:
import os
import sys
sys.path.insert(0, os.path.abspath(r'D:\Code Repos\prey_capture'))


import panel as pn
import holoviews as hv
from holoviews import opts, dim
hv.extension('bokeh')
from bokeh.resources import INLINE
from bokeh import palettes

import paths
import importlib
import functions_plotting as fp
import functions_bondjango as bd
import processing_parameters
import numpy as np
import pandas as pd
import h5py
import scipy.stats as stat
import datetime
import umap
from pprint import pprint
from sklearn import metrics

In [2]:
importlib.reload(fp)
importlib.reload(processing_parameters)
# set up the figure theme
fp.set_theme()
label_dict = processing_parameters.label_dictionary

In [3]:
# load the desired files and their associated regressions

# # define the target variable
# target_variable = 'cricket_0_mouse_distance'

# load the latents for each file with their attributes
# %%time
# Load the desired files
importlib.reload(processing_parameters)

# load the constants from the regression calculation
time_shifts = processing_parameters.time_shifts
shift_dict = {el: idx for idx, el in enumerate(time_shifts)}
shift_number = len(time_shifts)
shuffles = processing_parameters.regression_shuffles

# load the variable list
variable_list = processing_parameters.variable_list
# assemble the dataframe columns
reals = ['real_'+str(el) for el in time_shifts]
shuffle_means = ['smean_'+str(el) for el in time_shifts]
shuffle_sems = ['ssem_'+str(el) for el in time_shifts]
columns = reals + shuffle_means + shuffle_sems + ['mouse', 'day']

# get the search list
search_list = processing_parameters.search_list

# allocate a list for all paths (need to preload to get the dates)
all_paths = []
all_results = []
# for all the search strings
for search_string in search_list:

    # query the database for data to plot
    data_all = bd.query_database('analyzed_data', search_string)
#         data_all = [el for el in data_all if 'preproc' in el['slug']]
    data_path = [el['analysis_path'] for el in data_all if '_combinedanalysis' in el['slug']]
    data_result = [el['result'] for el in data_all if '_combinedanalysis' in el['slug']]
    all_paths.append(data_path)
    all_results.append(data_result)
# get the dates present
data_dates = np.unique([os.path.basename(el)[:10] for el in np.concatenate(all_paths)])
print(f'Dates present: {data_dates}')

# allocate memory for the resulting dataframe
data = {}
weights = {}
day_list = []
animal_list = []
joint_list = []
# for all the list items
for idx0, data_path in enumerate(all_paths):

    # for all the files
    for idx1, files in enumerate(data_path):
        
        # if a habi trial, skip
        if 'habi' in files:
            continue
        
        # get the animal and date from the slug
        name_parts = os.path.basename(files).split('_')
        animal = '_'.join(name_parts[7:10])
        day_s = '_'.join(name_parts[:3])
        day = datetime.datetime.strptime(day_s, '%m_%d_%Y')
        # skip if the animal and day are already evaluated, 
        # since the CC is the same for the whole day
        if animal+'_'+day_s in joint_list:
            continue
        else:
            animal_list.append(animal)
            day_list.append(day)
            joint_list.append(animal+'_'+day_s)
        # assemble the preproc path
        files_preproc = files.replace('_combinedanalysis', '_preproc')
        # open the file
        with pd.HDFStore(files_preproc, 'r') as preproc:
            if '/cell_matches' in preproc.keys():
                # get the matches
                cell_matches = preproc['cell_matches']
                
#                 print(animal, day_s, files_preproc)
                # get the idx for this file
                current_matches = cell_matches[datetime.datetime.strftime(day, '%m_%d_%Y')].to_numpy()
                current_idx = np.argsort(current_matches).astype(float)
                # remove the nan entries
                current_idx = current_idx[~np.isnan(np.sort(current_matches))]
                # get the roi info
                roi_info = preproc['roi_info']
        
        # load the data and the cell matches (wasteful, but cleaner I think)
        with h5py.File(files, 'r') as h:
            
            # for all the target variables
            for target_variable in variable_list:
                # create an empty list only if it's the same time this variable runs
                if target_variable not in data.keys():
                    data[target_variable] = []
                    weights[target_variable] = []
                # allocate memory for the real and shuffled regressions
                real_array = np.zeros((shift_number, 1))
                shuffle_array = np.zeros((shift_number, shuffles))
                real_weight = []
                shuffle_weight = []
#                 print(files)
                if 'regression' not in h.keys():
                    continue

                # for all the keys (will iterate through shifts and reps for shuffle)
                for key in h['/regression'].keys():

                    # skip if it's not a cc key or is not the target variable
                    if (target_variable not in key):
                        continue
                    # get the time shift and shuffle
                    key_parts = key.split('_')
                    shift = int([el[5:] for el in key_parts if 'shift' in el][0])
                    if 'cc' in key:

                        if 'real' in key_parts:
                             # save the values
                            real_array[shift_dict[shift]] = np.array(h['/regression/'+key])
                        else:
                            shuffle = int([el[7:] for el in key_parts if 'shuffle' in el][0])
                            shuffle_array[shift_dict[shift], shuffle-1] = np.array(h['/regression/'+key])
                    elif ('coefficients' in key and shift == 0):
                        if 'real' in key_parts:
                            real_weight = np.array(h['/regression/'+key])
                        else:
                            shuffle_weight.append(np.array(h['/regression/'+key]))    
                    else:
                        continue
                # average the shuffles and get the sem
                shuffle_mean = np.mean(shuffle_array, axis=1)
                shuffle_sem = stat.sem(shuffle_array, axis=1)
                # add the columns to the main list
                data[target_variable].append(list(real_array[:, 0]) + list(shuffle_mean) + list(shuffle_sem) + [animal, day])
                # take only the non time shift (need to check)
                if isinstance(real_weight, list):
                    continue
                shuffle_weight_mean = np.mean(shuffle_weight, axis=0)
                shuffle_weight_sem = stat.sem(shuffle_weight, axis=0)

                # store the weights                
                temp_df = pd.DataFrame(np.vstack((real_weight, shuffle_weight_mean, shuffle_weight_sem)).T, columns=['weight', 'shuffle_mean', 'shuffle_sem'])
                temp_df['match_id'] = current_idx 
                temp_df['animal'] = animal
                temp_df['day'] = day
                temp_df = pd.concat((temp_df, roi_info), axis=1)
                
                # add a cell id field
                cell_id = np.arange(temp_df.shape[0])
                temp_df['cell_id'] = cell_id

                weights[target_variable].append(temp_df)

            
# for all the variables once more
for target_variable in variable_list:
    # turn the overall list into a dataframe
    data[target_variable] = pd.DataFrame(data[target_variable], columns=columns)
    # turn the weights into a dictionary
    weights[target_variable] = pd.concat(weights[target_variable], axis=0)

#     print(f'Shape of the data dictionary: {data[target_variable].shape}')
#     print(f'Shape of the weights dataframe: {weights[target_variable].shape}')



Dates present: ['03_02_2020' '03_04_2020' '03_05_2020' '03_06_2020' '03_10_2020'
 '03_11_2020' '03_12_2020' '03_13_2020' '03_19_2021' '03_22_2021'
 '03_23_2021' '03_24_2021' '03_25_2021' '03_26_2021' '03_29_2021'
 '03_30_2021' '03_31_2021' '04_01_2021' '04_02_2021' '04_05_2021'
 '04_06_2021' '04_07_2021' '04_08_2021' '04_09_2021' '04_12_2021'
 '04_13_2021' '04_14_2021' '04_15_2021' '04_16_2021' '04_21_2021'
 '04_22_2021' '04_23_2021' '04_26_2021' '04_27_2021' '04_28_2021'
 '04_29_2021' '04_30_2021' '05_03_2021' '05_04_2021' '05_05_2021'
 '05_06_2021' '05_07_2021' '08_03_2020' '08_04_2020' '08_05_2020'
 '08_06_2020' '08_07_2020' '08_08_2020' '08_09_2020' '08_10_2020'
 '08_11_2020' '08_12_2020' '08_13_2020' '08_14_2020' '08_15_2020'
 '08_16_2020' '08_17_2020' '08_18_2020' '08_19_2020' '08_20_2020'
 '08_21_2020' '08_29_2020' '08_30_2020' '08_31_2020' '09_01_2020'
 '09_02_2020' '09_03_2020' '09_04_2020' '09_05_2020' '09_06_2020'
 '09_07_2020' '09_08_2020' '12_07_2019' '12_09_2019' '12_10_2

In [4]:
# plot the weight distributions
# put all the cells in a single dataframe with variables vs cells

# need to iterate through the variables, and concatenate the weights in order per day

# allocate memory for the output dataframe
regression_df = []

# for all the variables
for idx, target_feature in enumerate(weights.keys()):
    # get the data
    current_feature = weights[target_feature]

    if idx == 0:

        df = current_feature.loc[:, ['animal', 'day', 'match_id', 'cell_id', 'weight', 'centroid_x', 'centroid_y',
                                               'bbox_left', 'bbox_top', 'bbox_width', 'bbox_height', 'area']]
        df = df.rename(columns={'weight': target_feature})

    else:
        df = current_feature.loc[:, 'weight']
        df = df.rename(target_feature)

    regression_df.append(df)
# concatenate into a single dataframe
regression_df = pd.concat(regression_df, axis=1)


# eliminate rows with nans
regression_df = regression_df.iloc[~np.any(np.isnan(regression_df.drop(['animal', 'day'], axis=1).to_numpy()), axis=1), :]
print(regression_df.columns)
print(regression_df.shape)
    

Index(['animal', 'day', 'match_id', 'cell_id', 'mouse_speed', 'centroid_x',
       'centroid_y', 'bbox_left', 'bbox_top', 'bbox_width', 'bbox_height',
       'area', 'mouse_x', 'mouse_angular_speed', 'cricket_0_mouse_distance',
       'cricket_0_delta_heading', 'cricket_0_x', 'ego_cricket_x',
       'cricket_0_visual_angle', 'hunt_trace', 'cricket_0_direction',
       'cricket_0_loom', 'cricket_0_delta_visual', 'motifs', 'latent_0',
       'latent_1', 'latent_2'],
      dtype='object')
(15177, 27)


In [5]:
# Load the tc_consolidate file
importlib.reload(processing_parameters)

# get the search query
search_consolidate = processing_parameters.search_consolidate

# query the database for data to plot
data_path = bd.query_database('analyzed_data', search_consolidate)
# data_path = [el['analysis_path'] for el in data_path if 'test' not in el['analysis_path']][0]
data_path = [el['analysis_path'] for el in data_path if 'test' not in el['analysis_path']]
pprint(data_path)

['J:\\Drago Guggiana '
 'Nilo\\Prey_capture\\AnalyzedData\\preprocessing_ALL_miniscope_ALL_ALL_ALL_ALL_ALL_ALL_ALL_DG_200617_b_tcconsolidate.hdf5',
 'J:\\Drago Guggiana '
 'Nilo\\Prey_capture\\AnalyzedData\\preprocessing_ALL_miniscope_ALL_ALL_ALL_ALL_ALL_ALL_ALL_DG_190810_a_tcconsolidate.hdf5',
 'J:\\Drago Guggiana '
 'Nilo\\Prey_capture\\AnalyzedData\\preprocessing_ALL_miniscope_ALL_ALL_ALL_ALL_ALL_ALL_ALL_DG_190806_a_tcconsolidate.hdf5',
 'J:\\Drago Guggiana '
 'Nilo\\Prey_capture\\AnalyzedData\\preprocessing_ALL_miniscope_ALL_ALL_ALL_ALL_ALL_ALL_ALL_MM_191106_a_tcconsolidate.hdf5',
 'J:\\Drago Guggiana '
 'Nilo\\Prey_capture\\AnalyzedData\\preprocessing_ALL_miniscope_ALL_ALL_ALL_ALL_ALL_ALL_ALL_MM_191107_a_tcconsolidate.hdf5',
 'J:\\Drago Guggiana '
 'Nilo\\Prey_capture\\AnalyzedData\\preprocessing_ALL_miniscope_ALL_ALL_ALL_ALL_ALL_ALL_ALL_MM_191105_a_tcconsolidate.hdf5',
 'J:\\Drago Guggiana '
 'Nilo\\Prey_capture\\AnalyzedData\\preprocessing_ALL_miniscope_ALL_ALL_ALL_ALL_ALL_ALL_A

In [6]:
# Use the TCs for each cell and feature to embed with UMAP

tc_whole = []
target_features = processing_parameters.variable_list

# for all the targets
for idx, target_feature in enumerate(target_features):
    # load the data
    data = []
#     meta_data = []
    for file in data_path:
        
        try:
            data.append(pd.read_hdf(file, target_feature))
            meta_data = pd.read_hdf(file, 'meta_data')
            
            # filter out habi trials
            for idx1, el in enumerate(meta_data.to_numpy()):

                if ('habi' in el):
                    # get a vector with the rows to keep
                    keep_vector = ~(data[-1].loc[:, 'id'] == meta_data.loc[idx1, 'id']).to_numpy()
                    data[-1] = data[-1].iloc[keep_vector, :]

        except KeyError:
            continue
    data = pd.concat(data, axis=0)
    
    # load the relevant columns
    if idx == 0:
        target_columns = ['day', 'animal', 'cell_id', 'Resp_index', 'Cons_index'] + [el for el in data.columns if ('bin' in el) & ('half' not in el)]
    else:
        target_columns = ['Resp_index', 'Cons_index'] + [el for el in data.columns if ('bin' in el) & ('half' not in el)]
    
    data = data.loc[:, target_columns]
    
    if idx == 0:
        # convert the day from string to date format
        data.loc[:, 'day'] = np.array([datetime.datetime.strptime(el, '%m_%d_%Y') for el in data.loc[:, 'day']])
    
    # change the column names
    new_names = {el: target_feature+'_'+el if ('bin' in el) | ('index' in el) else el for el in target_columns}
    data = data.rename(columns=new_names)
    # save in the list
    tc_whole.append(data)

# concatenate    
tc_whole = pd.concat(tc_whole, axis=1)
print(tc_whole.shape)
# exclude all rows with nans
cleanup_columns = [el for el in tc_whole.columns if 'Resp' in el]
cleanup_data = tc_whole.loc[:, cleanup_columns].to_numpy()
cleanup_data[np.isnan(cleanup_data)] = 0
cleanup_data[np.isinf(cleanup_data)] = 0
tc_whole.loc[:, cleanup_columns] = cleanup_data

print(tc_whole.shape)


(15177, 355)
(15177, 355)


In [7]:
# sort the TCs to match the regressions
print(tc_whole.columns)

tc_whole = tc_whole.sort_values(['animal', 'day', 'cell_id'], axis=0).reset_index(drop=True)
regression_df = regression_df.sort_values(['animal', 'day', 'cell_id'], axis=0).reset_index(drop=True)

# add the match_id to the TC
tc_whole['match_id'] = regression_df['match_id']

# save copies of the original for subselection
tc_whole_ori = tc_whole.copy()
regression_df_ori = regression_df.copy()

Index(['day', 'animal', 'cell_id', 'mouse_speed_Resp_index',
       'mouse_speed_Cons_index', 'mouse_speed_bin_0', 'mouse_speed_bin_1',
       'mouse_speed_bin_2', 'mouse_speed_bin_3', 'mouse_speed_bin_4',
       ...
       'latent_2_bin_10', 'latent_2_bin_11', 'latent_2_bin_12',
       'latent_2_bin_13', 'latent_2_bin_14', 'latent_2_bin_15',
       'latent_2_bin_16', 'latent_2_bin_17', 'latent_2_bin_18',
       'latent_2_bin_19'],
      dtype='object', length=355)


In [8]:
# explore the roi info

# plot are distributions
freq, edges = np.histogram(np.log10(regression_df['area'].to_numpy()), bins=40)
areas = hv.Histogram((edges, freq), kdims='ROI area (log(px))')
areas.opts(xrotation=45, width=400)

# calculate the distance between matches
# annotate a list to accumulate distances
distance_list = []
# run through all the mice
for mouse_name, mouse_data in regression_df.groupby(['animal']):
    for cell_name, cell_data in mouse_data.groupby(['match_id']):
        # skip the singles
        if cell_data.shape[0] == 1:
            continue
        # get the features
        features = cell_data[['centroid_x', 'centroid_y']]

        # get the distance matrix
        distance_matrix = metrics.pairwise_distances(features.to_numpy())
#         distance_list.append(np.diagonal(distance_matrix, offset=1))
        distance_list.append(np.mean(distance_matrix, axis=0))
# concatenate
distance_list = np.hstack(distance_list)
# calculate and plot the histogram
freq_dist, edges_dist = np.histogram(distance_list, bins=40)
distances = hv.Histogram((edges_dist, freq_dist), kdims='Cortical distance (px)')
distances.opts(xrotation=45, width=400)

(areas+distances).opts(shared_axes=False)

In [9]:
# plot the roi centroid for matching pairs

# initialize the plot list
plot_list = []
# define the number of matches to take per mouse
target_matches = 10
# generate a palette accordingly
colors = palettes.brewer['Spectral'][target_matches]

# run through all the mice
for mouse_name, mouse_data in regression_df.groupby(['animal']):
    
    # get the number of matched cells per id
    match_numbers = mouse_data.groupby(['match_id'], as_index=False)['match_id'].count()

    # exclude the non-matched
    match_numbers = match_numbers.iloc[match_numbers['match_id'].to_numpy()>1, :]

    # if there aren't enough matches, just use all
    target_ids = np.array(match_numbers.index)
    if match_numbers.shape[0] > target_matches:
        target_ids = np.random.choice(target_ids, size=target_matches)
    # initialize a list for this roi's plot
    cell_plot = []
    # for all the ids    
    for idx, ids in enumerate(target_ids):

        # get the data and plot
        current_idx = mouse_data['match_id'].to_numpy() == ids
#         current_data = mouse_data.loc[current_idx, ['centroid_x', 'centroid_y']]
#         plot = hv.Scatter(current_data)
#         cell_plot.append(plot)
        current_data = mouse_data.loc[current_idx, ['bbox_left', 'bbox_top', 'bbox_width', 'bbox_height']]
        # for all the cells
        for index, cells in current_data.iterrows():
            # assemble the bounding box for plotting
            bbox = np.array([[cells['bbox_left'], cells['bbox_top']], [cells['bbox_left']+cells['bbox_width'], cells['bbox_top']],
                            [cells['bbox_left']+cells['bbox_width'], cells['bbox_top']+cells['bbox_height']], [cells['bbox_left'], cells['bbox_top']+cells['bbox_height']], 
                            [cells['bbox_left'], cells['bbox_top']]])
            plot = hv.Curve(bbox)
            plot.opts(color=colors[idx], width=400, height=400, title=mouse_name)
            cell_plot.append(plot)
    # overlay and store
    plot_list.append(hv.Overlay(cell_plot))
# plot
hv.Layout(plot_list).cols(3)

In [10]:
# sub-select cells

# define a target feature to sort by
target_feature = 'mouse_x'
# define the percentage of cells to select
target_percentage = 100

# exclude cells that don't pass an area criterion

# define the area criterion (based on the area histogram across the dataset)
area_min = 30
area_max = 300
distance = 10

keep_vector = (regression_df_ori['area'].to_numpy() > area_min) & (regression_df_ori['area'].to_numpy() < area_max)

# get the distances for the distance criterion
# annotate a list to accumulate distances
distance_list = []
# run through all the mice
for mouse_name, mouse_data in regression_df_ori.groupby(['animal']):
    for cell_name, cell_data in mouse_data.groupby(['match_id']):
        # skip the singles
        if cell_data.shape[0] == 1:
            average_distance = np.nan
        else:
            # get the features
            features = cell_data[['centroid_x', 'centroid_y']]

            # get the distance matrix
            distance_matrix = metrics.pairwise_distances(features.to_numpy())
    #         distance_list.append(np.diagonal(distance_matrix, offset=1))
            average_distance = np.mean(distance_matrix, axis=0)
        # assemble a dataframe
        distance_df = cell_data[['animal', 'day', 'cell_id']].copy()
        distance_df.loc[:, 'distance'] = average_distance
        
        distance_list.append(distance_df)
# concatenate
distance_list = pd.concat(distance_list, axis=0)
distance_list = distance_list.sort_values(['animal', 'day', 'cell_id'], axis=0).reset_index(drop=True)

# threshold
keep_vector = (keep_vector) & (distance_list['distance'].to_numpy() < 10)
# replace in the data
regression_df = regression_df_ori.iloc[keep_vector, :]
tc_whole = tc_whole_ori.iloc[keep_vector, :]

# leave only cells with matches

# allocate the list for accumulation
accumulation_list = []
# also allocate a list for the cell coordinates to save
idx_list = []
# run through all the mice
for mouse_name, mouse_data in regression_df.groupby(['animal']):
    
    for cell_name, cell_data in mouse_data.groupby(['match_id']):
        
        # skip the singles
        if cell_data.shape[0] == 1:
            continue
        # accumulate the cells
        accumulation_list.append(cell_data)
        # accumulate the idx
        idx_list.append(cell_data.index.to_numpy())
# concatenate them
regression_df = pd.concat(accumulation_list)
# also sub-select from the TCs
tc_whole = tc_whole.loc[np.hstack(idx_list), :]

# sort by a target feature

# get the actual target number
target_number = int(np.round(tc_whole.shape[0]*target_percentage/100))

# get the feature sorting
feature = tc_whole.loc[:, target_feature+'_Cons_index'].to_numpy()
# feature = regression_df_ori.loc[:, target_feature].to_numpy()
feature_idx = np.argsort(feature)
sorted_feature = feature[feature_idx]
# exclude the nans
feature_idx = feature_idx[~np.isnan(sorted_feature)]
feature_idx = feature_idx[-target_number:]

# get the cells from both 
tc_whole = tc_whole.iloc[feature_idx, :]
regression_df = regression_df.iloc[feature_idx, :]
# resort
tc_whole = tc_whole.sort_values(['animal', 'day', 'cell_id'], axis=0).reset_index(drop=True)
regression_df = regression_df.sort_values(['animal', 'day', 'cell_id'], axis=0).reset_index(drop=True)
print(feature[feature_idx])

[-0.59102124 -0.5785829  -0.55970407 ...  1.          1.
  1.        ]


In [11]:
# calculate the correlation matrix
# get the columns for both methods
resp_columns = [el for el in tc_whole.columns if 'Resp_index' in el]
cons_columns = [el for el in tc_whole.columns if 'Cons_index' in el]
# get the TCs
tc_resp = tc_whole.loc[:, resp_columns].to_numpy()
tc_cons = tc_whole.loc[:, cons_columns].to_numpy()
tc_indexes = tc_resp * tc_cons

# get the regressions
regression_matrix = regression_df.loc[:, variable_list].to_numpy()

# take the abs
tc_indexes = np.abs(tc_indexes)
regression_matrix = np.abs(regression_matrix)

# # artificially sort both
# tc_indexes.sort(axis=0)
# regression_matrix.sort(axis=0)

# calculate correlation
correlation_matrix, pvalue_matrix = stat.spearmanr(tc_indexes, regression_matrix, nan_policy='omit')

print(tc_indexes.shape)
print(regression_matrix.shape)
print(correlation_matrix.shape)

(4964, 16)
(4964, 16)
(32, 32)


In [12]:
# plot the correlation matrix
ticks = [(idx+0.5, el[:-11]) for idx, el in enumerate(resp_columns+resp_columns)]

raster = hv.Raster(correlation_matrix)
raster.opts(width=1000, height=800, yticks=ticks, xticks=ticks, xrotation=45, colorbar=True, cmap='RdBu', clim=(-1, 1), tools=['hover'])
raster
# print(ticks)

In [13]:
# plot the top and worst cells for each feature

# allocate memory for the plot list
plot_list = []
# define the percentages to plot
percentage_target = 10
number_target = int(np.round(tc_whole.shape[0]*(percentage_target/100)))
# for all the features
for feature in variable_list:
    # get the TC and regression data
#     print(tc_whole.columns)
    tc_resp = np.abs(tc_whole.loc[:, feature+'_Resp_index'].to_numpy().copy())
    tc_cons = np.abs(tc_whole.loc[:, feature+'_Cons_index'].to_numpy().copy())
    tc_data = tc_resp * tc_cons
    tc_data[tc_data > 100] = 0
    regression_data = np.abs(regression_df.loc[:, feature].to_numpy()).copy()
    
    # exclude nan values
    keep_vector = ~np.isnan(tc_data)
    tc_data = tc_data[keep_vector]
    regression_data = regression_data[keep_vector]
    # get the indexes of the top and bottom n percent
    idx = np.argsort(tc_data)
    
    # get the top and bottom
    tc_top = tc_data[idx[-number_target:]]
    regression_top = regression_data[idx[-number_target:]]
    
    tc_bottom = tc_data[idx[:number_target]]
    regression_bottom = regression_data[idx[:number_target]]
    
#     # artificially sort them
#     tc_top.sort()
#     regression_top.sort()
    
    correlation = stat.spearmanr(tc_top, regression_top)[0]
    
    # plot
    top_scatter = hv.Scatter((tc_top, regression_top), kdims=['TC index'], vdims='Regression weight')
    top_scatter.opts(xrotation=45, title=feature+str(correlation), width=300, height=300)
    bottom_scatter = hv.Scatter((tc_bottom, regression_bottom))
    
    plot_list.append(top_scatter*bottom_scatter)
    
hv.Layout(plot_list).cols(4).opts(shared_axes=False)




In [14]:
# normalize the weights between 0-1

regression_normalized = regression_df.copy()

# regression_normalized.loc[:, variable_list] = (regression_normalized.loc[:, variable_list] - regression_normalized.loc[:, variable_list].min(axis=0)) / \
#     (regression_normalized.loc[:, variable_list].max(axis=0) - regression_normalized.loc[:, variable_list].min(axis=0))


In [15]:
# plot the cell matching based info (can use match_id for both methods since they are already sorted)

print(regression_normalized.columns)
# allocate memory for the correlations
correlation_list = []
# run through all the mice
for mouse_name, mouse_data in regression_normalized.groupby(['animal']):
    # if the mouse is empty, skip
#     print(mouse_data.shape)
#     if mouse_data.shape[0] == 0:
#         continue
    #     print(mouse)
    mouse_list = []
    
    for cell_name, cell_data in mouse_data.groupby(['match_id']):
        
        # skip the singles
        if cell_data.shape[0] == 1:
            continue
        # get the features
        features = cell_data[variable_list].to_numpy()
        # calculate the correlation matrix
        correlation = stat.spearmanr(features.T, nan_policy='omit')[0]
        # get the average of the off diagonal elements
#         print(correlation)
        
        if isinstance(correlation, np.ndarray):
            triu = np.triu(correlation, k=1)
            average = np.mean(triu[triu != 0])
        else:
            average = correlation
#         print(type(average))
#         average = np.float(average)
        # store
        mouse_list.append([mouse_name, cell_name, average])
#         print(type(mouse_list[-1][-1]))
#         print(np.vstack(mouse_list))
#         raise ValueError
    # if no matches were found, skip
    if len(mouse_list) == 0:
        continue
    correlation_list.append(pd.DataFrame(np.vstack(mouse_list), columns=['animal', 'match_id', 'correlation']))
#     print(correlation_list[0])
#     raise ValueError
    
correlation_list = pd.concat(correlation_list)
correlation_list['correlation'] = correlation_list['correlation'].to_numpy().astype(float)
print(correlation_list)

Index(['animal', 'day', 'match_id', 'cell_id', 'mouse_speed', 'centroid_x',
       'centroid_y', 'bbox_left', 'bbox_top', 'bbox_width', 'bbox_height',
       'area', 'mouse_x', 'mouse_angular_speed', 'cricket_0_mouse_distance',
       'cricket_0_delta_heading', 'cricket_0_x', 'ego_cricket_x',
       'cricket_0_visual_angle', 'hunt_trace', 'cricket_0_direction',
       'cricket_0_loom', 'cricket_0_delta_visual', 'motifs', 'latent_0',
       'latent_1', 'latent_2'],
      dtype='object')


  c /= stddev[:, None]
  c /= stddev[None, :]


         animal match_id  correlation
0   DG_200617_b      0.0     0.288235
1   DG_200617_b      3.0     0.464706
2   DG_200617_b      4.0     0.139263
3   DG_200617_b      5.0    -0.008627
4   DG_200617_b      7.0     0.026471
..          ...      ...          ...
28  MM_200129_b     46.0    -0.067647
29  MM_200129_b     53.0    -0.239216
30  MM_200129_b     59.0     0.002941
31  MM_200129_b     66.0    -0.253922
32  MM_200129_b     68.0    -0.102941

[1222 rows x 3 columns]


In [16]:
# plot the distribution of correlations

plot_list = []
# # for all the mice
# for mouse_name, mouse_data in correlation_list.groupby(['animal']):
#     plot = hv.Scatter(mouse_data['correlation'])
# plot_array = correlation_list[['animal', 'correlation']].copy().reset_index(drop=True)
plot_array = correlation_list
# plot_array[plot_array.isna()] = 0
# print(type(plot_array.iloc[1, 1]))
# print(type(float(plot_array.iloc[1, 1])))
# plot_array[np.isinf(plot_array)] = 0
box = hv.BoxWhisker(plot_array, ['animal'], ['correlation'])
box.opts(width=800, height=600, xrotation=45)
box

In [17]:
# calculate averages per mouse

# allocate memory for the correlations
correlation_interval = []
# run through all the mice
for mouse_name, mouse_data in regression_normalized.groupby(['animal']):
    
    mouse_list = []    
    random_list = []
    for cell_name, cell_data in mouse_data.groupby(['match_id']):
        # skip the singles
        if cell_data.shape[0] == 1:
            continue
        # get the features
        features = cell_data[variable_list]
        
        # calculate the correlation matrix
        correlation = stat.spearmanr(features.T, nan_policy='omit')[0]
        
        # get a set of random cells of this size
        random_idx = np.random.randint(0, mouse_data.shape[0]-1, cell_data.shape[0])
        random_cells = mouse_data.iloc[random_idx, :]
        
        random_correlation = stat.spearmanr(random_cells[variable_list].T, nan_policy='omit')[0]
        
        if isinstance(random_correlation, float) & isinstance(correlation, np.ndarray):
            random_correlation = np.zeros_like(correlation)*np.nan
        elif isinstance(correlation, float) & isinstance(random_correlation, np.ndarray):
            random_correlation = np.nan
        # get the delta days
        day_data = cell_data.loc[:, 'day'].to_numpy()
        delta_days = [(el-day_data[0]) for el in day_data]
        delta_days = (delta_days/np.timedelta64(1, 'D')).astype(int)

        # get the average of the off diagonal elements
        if isinstance(correlation, np.ndarray):
            # for all the off diagonals
            for el in np.arange(1, correlation.shape[0]):
                diag = np.diagonal(correlation, offset=el)
                average = np.mean(diag)
                
                random_diag = np.diagonal(random_correlation, offset=el)
                random_average = np.mean(random_diag)
                # store
                mouse_list.append([mouse_name, cell_name, delta_days[el], average])   
                random_list.append([mouse_name+'_random', cell_name, delta_days[el], random_average])   
        else:
            average = correlation
            random_average = random_correlation
            # store
            mouse_list.append([mouse_name, cell_name, delta_days[1], average])
            random_list.append([mouse_name+'_random', cell_name, delta_days[1], random_average])
    # if no matches were found, skip
    if len(mouse_list) == 0:
        continue
    correlation_interval.append(pd.DataFrame(np.vstack(mouse_list), columns=['animal', 'match_id', 'interval', 'correlation']))
    correlation_interval.append(pd.DataFrame(np.vstack(random_list), columns=['animal', 'match_id', 'interval', 'correlation']))

    
correlation_interval = pd.concat(correlation_interval)
correlation_interval['correlation'] = correlation_interval['correlation'].to_numpy().astype(float)

print(correlation_interval.shape)

  c /= stddev[:, None]
  c /= stddev[None, :]


(6680, 4)


In [18]:
# plot the correlations per animal

# allocate a list for the plots
plot_list = []
# calculate the mean and sem
grouped_data = correlation_interval.groupby(['animal'], as_index=False)[['interval', 'correlation']]

# for all the animals
for mouse_name, mouse_data in grouped_data:
    
    mouse_mean = mouse_data.groupby(['interval'], as_index=False)['correlation'].mean()
    mouse_mean.loc[:, 'interval'] = mouse_mean.loc[:, 'interval'].astype(float)
    mouse_mean = mouse_mean.sort_values(['interval'])
    
    mouse_sem = mouse_data.groupby(['interval'], as_index=False)['correlation'].sem()
    mouse_sem.loc[:, 'interval'] = mouse_sem.loc[:, 'interval'].astype(float)
    mouse_sem = mouse_sem.sort_values(['interval'])
    
    mean_plot = hv.Curve((mouse_mean['interval'], mouse_mean['correlation']), kdims=['Interval'], vdims='Correlation', label=mouse_name)
    sem_plot = hv.Spread((mouse_mean['interval'], mouse_mean['correlation'], mouse_sem['correlation']))
    mean_plot.opts(height=600, width=800, xrotation=45)
    
    plot_list.append(mean_plot*sem_plot)
hv.Overlay(plot_list)

In [19]:
# calculate averages per feature

# allocate memory for the correlations
feature_diffs = {}

# for all the features
for feature in variable_list:
    feature_df = []
    # run through all the mice
    for mouse_name, mouse_data in regression_normalized.groupby(['animal']):
        mouse_list = []
        random_list = []
        for cell_name, cell_data in mouse_data.groupby(['match_id']):
            # skip the singles
            if cell_data.shape[0] == 1:
                continue
            # get the features
            features = cell_data[feature]

            # get the distance matrix
            distances = metrics.pairwise_distances(features.to_numpy().reshape(-1, 1))
#             distances[distances == 0] = np.nan
#             distances = 1 - distances
    
            # get a set of random cells of this size
            random_idx = np.random.randint(0, mouse_data.shape[0]-1, cell_data.shape[0])
            random_cells = mouse_data.iloc[random_idx, :]
            random_distances = metrics.pairwise_distances(random_cells[feature].to_numpy().reshape(-1, 1))
#             random_distances = 1 - random_distances
            
            # get the delta days
            day_data = cell_data.loc[:, 'day'].to_numpy()
            delta_days = [(el-day_data[0]) for el in day_data]
            delta_days = (delta_days/np.timedelta64(1, 'D')).astype(int)
            # get the average of the off diagonal elements
            if isinstance(distances, np.ndarray):
                # for all the off diagonals
                for el in np.arange(1, distances.shape[0]):
                    diag = np.diagonal(distances, offset=el)
                    average = np.mean(diag)
                    mouse_list.append([mouse_name, cell_name, delta_days[el], average])
                    
                    random_diag = np.diagonal(random_distances, offset=el)
                    random_average = np.mean(random_diag)
                    random_list.append([mouse_name+'_random', 'random', delta_days[el], random_average])

            else:
                average = distances
                random_average = random_distances

                # store
                mouse_list.append([mouse_name, cell_name, delta_days[1], average])
                random_list.append([mouse_name+'_random', 'random', delta_days[1], random_average])
        # if not matches were found, skip
        if len(mouse_list) == 0:
            continue
        feature_df.append(pd.DataFrame(np.vstack(mouse_list), columns=['animal', 'match_id', 'interval', 'similarity']))
        feature_df.append(pd.DataFrame(np.vstack(random_list), columns=['animal', 'match_id', 'interval', 'similarity']))

    feature_df = pd.concat(feature_df)
    feature_df['similarity'] = feature_df['similarity'].to_numpy().astype(float)
    # store for the particular feature
    feature_diffs[feature] = feature_df

print(feature_diffs[feature].shape)

(7000, 4)


In [20]:
# plot the per-animal and per-feature distances
main_list = []

# for all the features
for feature in variable_list:
    plot_list = []
    # get the dataframe
    current_df = feature_diffs[feature]
    
    # calculate the mean and sem
    grouped_data = current_df.groupby(['animal'], as_index=False)[['interval', 'similarity']]

    # for all the animals
    for mouse_name, mouse_data in grouped_data:

        mouse_mean = mouse_data.groupby(['interval'], as_index=False)['similarity'].mean()
        mouse_mean.loc[:, 'interval'] = mouse_mean.loc[:, 'interval'].astype(float)
        mouse_mean = mouse_mean.sort_values(['interval'])

        mouse_sem = mouse_data.groupby(['interval'], as_index=False)['similarity'].sem()
        mouse_sem.loc[:, 'interval'] = mouse_sem.loc[:, 'interval'].astype(float)
        mouse_sem = mouse_sem.sort_values(['interval'])

        mean_plot = hv.Curve((mouse_mean['interval'], mouse_mean['similarity']), kdims=['Interval'], vdims='Similarity')
        sem_plot = hv.Spread((mouse_mean['interval'], mouse_mean['similarity'], mouse_sem['similarity']))
        mean_plot.opts(height=400, width=400, xrotation=45, title=feature)

        plot_list.append(mean_plot*sem_plot)
    main_list.append(hv.Overlay(plot_list))

hv.Layout(main_list).cols(4).opts(shared_axes=False)

In [30]:
# plot across intervals

# plot the per-animal and per-feature distances
main_list = []
# define the target interval
target_interval = '1'
# for all the features
for feature in variable_list:
    # get the dataframe
#     current_df = feature_diffs[feature]
#     current_df['feature'] = feature
    # get the dataframe
    current_df = feature_diffs[feature]
    current_real = current_df[(current_df['match_id'] != 'random') & (current_df['interval'] == target_interval)]
    current_random = current_df[(current_df['match_id'] == 'random') & (current_df['interval'] == target_interval)]
    current_real['feature'] = feature
    current_random['feature'] = feature+'_random'
    
    current_real['similarity'] = current_real['similarity'].fillna(0)
    current_random['similarity'] = current_random['similarity'].fillna(0)
#     print((current_df['interval'] ))
    print(feature, stat.mannwhitneyu(current_real['similarity'], current_random['similarity'])[1]*len(variable_list))

    # store
    main_list.append(current_real)
    main_list.append(current_random)

    # store
    main_list.append(current_df)

# turn into a dataframe
main_list = pd.concat(main_list)

box = hv.BoxWhisker(main_list, ['feature'], ['similarity'])
box.opts(width=1200, height=600, xrotation=45, ylabel='distance')
box
#         mean_plot = hv.Curve((mouse_mean['interval'], mouse_mean['correlation']))
#         sem_plot = hv.Spread((mouse_mean['interval'], mouse_mean['correlation'], mouse_sem['correlation']))
#         mean_plot.opts(height=400, width=400, xrotation=45, title=feature)

#         plot_list.append(mean_plot*sem_plot)
#     main_list.append(hv.Overlay(plot_list))

# hv.Layout(main_list).cols(4).opts(shared_axes=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  current_real['feature'] = feature
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  current_random['feature'] = feature+'_random'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  current_real['similarity'] = current_real['similarity'].fillna(0)
A value is trying to be set on a copy of a slice from a Dat

mouse_speed 7.636377446326415
mouse_x 11.00965521981702
mouse_angular_speed 13.759652069677847
cricket_0_mouse_distance 0.350051133817404
cricket_0_delta_heading 7.368957652176318
cricket_0_x 12.430168110652591
ego_cricket_x 4.137104600317878
cricket_0_visual_angle 7.241558233313176
hunt_trace 0.9292972197013102
cricket_0_direction 14.148045429775507
cricket_0_loom 15.005860261161152
cricket_0_delta_visual 7.030127585673693
motifs 9.37792498276286
latent_0 12.735803229502814
latent_1 10.347955000846133
latent_2 14.899256851507273


In [51]:
# get the tc similarities by correlating the actual TCs

# print(tc_whole.columns[:50])
# allocate the output
tc_interval = {}
# for all the features
for feature in variable_list:
    # get the relevant columns
    current_columns = [el for el in tc_whole.columns if ('bin' in el) & (feature in el)]
#     print(current_columns)
#     raise ValueError
    current_df = tc_whole[['day', 'animal', 'match_id'] + current_columns]
    
    feature_df = []
    # run through all the mice
    for mouse_name, mouse_data in current_df.groupby(['animal']):
        mouse_list = []
        random_list = []
        index_list = []
        for match_id, cell_data in mouse_data.groupby(['match_id']):
            # skip the singles
            if cell_data.shape[0] < 4:
                continue
            # calculate the correlation
#             correlation = stat.spearmanr(cell_data[current_columns].T, nan_policy='omit')[0]
            current_cells = cell_data[current_columns].to_numpy()
            current_cells[np.isnan(current_cells)] = 0    
            correlation = metrics.pairwise_distances(current_cells)

            
            # get a set of random cells of this size
            random_idx = np.random.randint(0, mouse_data.shape[0]-1, cell_data.shape[0])
            random_cells = mouse_data.iloc[random_idx, :]
            random_cells = random_cells[current_columns].to_numpy()
            random_cells[np.isnan(random_cells)] = 0    
#             random_correlation = stat.spearmanr(random_cells[current_columns].T, nan_policy='omit')[0]
            random_correlation = metrics.pairwise_distances(random_cells)
            
            if isinstance(random_correlation, float) & isinstance(correlation, np.ndarray):
                random_correlation = np.zeros_like(correlation)*np.nan
            elif isinstance(correlation, float) & isinstance(random_correlation, np.ndarray):
                random_correlation = np.nan
            
            # get the delta days
            day_data = cell_data.loc[:, 'day'].to_numpy()
            delta_days = [(el-day_data[0]) for el in day_data]
            delta_days = (delta_days/np.timedelta64(1, 'D')).astype(int)
            # get the average of the off diagonal elements
            if isinstance(correlation, np.ndarray):
                # for all the off diagonals
                for el in np.arange(1, correlation.shape[0]):
                    diag = np.diagonal(correlation, offset=el)
                    average = np.mean(diag)
                    
                    random_diag = np.diagonal(random_correlation, offset=el)
                    random_average = np.mean(random_diag)
                    # store
                    mouse_list.append([mouse_name, match_id, delta_days[el], average])      
                    random_list.append([mouse_name+'_random', 'random', delta_days[el], random_average])      
            else:
                average = correlation
                random_average = random_correlation
                # store
                mouse_list.append([mouse_name, match_id, delta_days[1], average])
                random_list.append([mouse_name+'_random', 'random', delta_days[1], random_average])

        if len(mouse_list) == 0:
            continue
        feature_df.append(pd.DataFrame(np.vstack(mouse_list), columns=['animal', 'match_id', 'interval', 'correlation']))
        feature_df.append(pd.DataFrame(np.vstack(random_list), columns=['animal', 'match_id', 'interval', 'correlation']))
    
    
    feature_df = pd.concat(feature_df)
    feature_df['correlation'] = feature_df['correlation'].to_numpy().astype(float)
    
    # store for the particular feature
    tc_interval[feature] = feature_df

print(tc_interval[feature].shape)
    

(5066, 4)


In [23]:
# plot the tc correlations as a function of feature
main_list = []

# for all the features
for feature in variable_list:
    plot_list = []
    # get the dataframe
    current_df = tc_interval[feature]
    
    # calculate the mean and sem
    grouped_data = current_df.groupby(['animal'], as_index=False)[['interval', 'correlation']]

    # for all the animals
    for mouse_name, mouse_data in grouped_data:

        mouse_mean = mouse_data.groupby(['interval'], as_index=False)['correlation'].mean()
        mouse_mean.loc[:, 'interval'] = mouse_mean.loc[:, 'interval'].astype(float)
        mouse_mean = mouse_mean.sort_values(['interval'])

        mouse_sem = mouse_data.groupby(['interval'], as_index=False)['correlation'].sem()
        mouse_sem.loc[:, 'interval'] = mouse_sem.loc[:, 'interval'].astype(float)
        mouse_sem = mouse_sem.sort_values(['interval'])
        
        mean_plot = hv.Curve((mouse_mean['interval'], mouse_mean['correlation']), kdims=['Interval'], vdims='Correlation')
        sem_plot = hv.Spread((mouse_mean['interval'], mouse_mean['correlation'], mouse_sem['correlation']))
        if 'random' in mouse_name:
            color = 'k'
        else:
            color = 'r'    
        mean_plot.opts(height=400, width=400, xrotation=45, title=feature, color=color)
        sem_plot.opts(height=400, width=400, xrotation=45, title=feature, color=color)

        plot_list.append(mean_plot*sem_plot)
    main_list.append(hv.Overlay(plot_list))

hv.Layout(main_list).cols(4).opts(shared_axes=False)

In [52]:
# plot the per-animal and per-feature distances
main_list = []

# define the target interval
target_interval = '1'
# for all the features
for feature in variable_list:
    # get the dataframe
    current_df = tc_interval[feature]
    current_real = current_df[(current_df['match_id'] != 'random') & (current_df['interval'] == target_interval)]
    current_random = current_df[(current_df['match_id'] == 'random') & (current_df['interval'] == target_interval)]
    current_real['feature'] = feature
    current_random['feature'] = feature+'_random'
    
    current_real['correlation'] = current_real['correlation'].fillna(0)
    current_random['correlation'] = current_random['correlation'].fillna(0)
    
    print(feature, stat.mannwhitneyu(current_real['correlation'], current_random['correlation'])[1]*len(variable_list), current_real.shape[0])
#     # calculate the mean and sem
#     data_mean = current_df.loc[:, 'correlation'].mean().to_numpy()
#     data_sem = current_df.loc[:, 'correlation'].sem().to_numpy()
    # store
    main_list.append(current_real)
    main_list.append(current_random)

# turn into a dataframe
main_list = pd.concat(main_list)

box = hv.BoxWhisker(main_list, ['feature'], ['correlation'])
box.opts(width=1000, height=600, xrotation=45, ylabel='Distance')
box

mouse_speed 1.2459653673201363 212
mouse_x 11.746115069953502 212
mouse_angular_speed 3.411307044159635 212
cricket_0_mouse_distance 14.78256015869353 212
cricket_0_delta_heading 10.418173883661382 212
cricket_0_x 7.242449507615429 212
ego_cricket_x 13.501692294371047 212
cricket_0_visual_angle 5.416444355585588 212
hunt_trace 0.8244601820711199 212
cricket_0_direction 3.543674656237709 212
cricket_0_loom 7.250083971337405 212
cricket_0_delta_visual 2.8311603198169504 212
motifs 10.436450907883234 212
latent_0 1.785150799291412 212
latent_1 5.195402863563508 212
latent_2 3.23742708202796 212


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  current_real['feature'] = feature
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  current_random['feature'] = feature+'_random'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  current_real['correlation'] = current_real['correlation'].fillna(0)
A value is trying to be set on a copy of a slice from a D

In [33]:
# visualize single cell TCs based on matching

# define the target feature
target_feature = 'cricket_0_mouse_distance'

# define the number of groups to take
target_number = 20

# get only the non-random data
# print(tc_interval[target_feature]['match_id'] != 'random')
tc_nonrandom = tc_mean[target_feature][tc_mean[target_feature]['match_id'] != 'random']
# get the top N groups for the target feature
top_idx = np.argsort(tc_nonrandom['distance'].to_numpy())[:target_number]
# print(np.sort(tc_nonrandom['correlation'].to_numpy())[-target_number:])
# raise ValueError
top_groups = tc_nonrandom.iloc[top_idx, :]

# initialize the plot list
plot_list = []
# plot the corresponding TCs
for idx, group in top_groups.iterrows():
    # get the match id and mouse
    target_mouse = group['animal']
    target_id = float(group['match_id'])
    # get the individual tuning curves
    target_cells = tc_whole[(tc_whole['animal']==target_mouse) & (tc_whole['match_id']==target_id)]
    print(target_mouse, target_id, target_cells.shape, group['distance'])
    # get the relevant columns
    target_columns = [el for el in tc_whole.columns if ('bin' in el) & (target_feature in el)]
    target_cells = target_cells[target_columns]
    x = np.arange(target_cells.shape[1])
    # plot
    # accumulate the tuning curves
    tc_list = []
    # for all the cells
    for cell_idx, cell in target_cells.iterrows():
        plot = hv.Curve((x, cell.to_numpy()), kdims=['Bin'], vdims='Tuning')
        plot.opts(height=300, width=300)
        tc_list.append(plot)
    # store
    plot_list.append(hv.Overlay(tc_list))
# plot the layout
layout = hv.Layout(plot_list).opts(shared_axes=False)
layout

DG_210202_a 178.0 (2, 356) 0.0025179996
DG_210202_a 1267.0 (2, 356) 0.0036405765
DG_200701_a 65.0 (2, 356) 0.0051371516
DG_210202_a 333.0 (2, 356) 0.006212522
DG_200701_a 475.0 (2, 356) 0.006610353
DG_200617_b 4.0 (2, 356) 0.006615699
DG_210202_a 306.0 (3, 356) 0.0067342073
DG_210202_a 1110.0 (2, 356) 0.0072023114
DG_210202_a 1074.0 (2, 356) 0.00747444
DG_210202_a 723.0 (2, 356) 0.0078112367
MM_191108_a 38.0 (2, 356) 0.0081030335
DG_210202_a 1102.0 (2, 356) 0.008134334
DG_210202_a 699.0 (2, 356) 0.008396027
DG_210202_a 747.0 (3, 356) 0.008415939
DG_210202_a 612.0 (4, 356) 0.008683578
DG_210202_a 714.0 (2, 356) 0.009343282
DG_200701_a 583.0 (2, 356) 0.0093541825
DG_200701_a 191.0 (2, 356) 0.009824036
DG_200701_a 435.0 (2, 356) 0.010888291
DG_210202_a 843.0 (2, 356) 0.011167878
