# Visualize comparisons between real and virtual prey capture attempts

In [2]:
import os
import sys
sys.path.insert(0, r'..\..')
import paths

import panel as pn
import holoviews as hv
from holoviews import opts, dim
hv.extension('bokeh')
from bokeh.resources import INLINE

import functions_bondjango as bd
import functions_kinematic as fk
import functions_plotting as fp
import functions_misc as fm
import functions_data_handling as fd
import pandas as pd
import numpy as np
import h5py

from scipy.stats import sem
import sklearn.decomposition as decomp
import umap
import sklearn.mixture as mix
from scipy.stats import sem


line_width = 5

In [3]:
# Define a data loading function

def load_dataset(search_string, exclusion=None):
    # load the data
    # get the data paths
    try:
        data_path = snakemake.input[0]
    except NameError:
        # query the database for data to plot
        data_all = bd.query_database('analyzed_data', search_string)

        if exclusion is not None:
            for ds in data_all:
                if exclusion not in ds['analysis_path']:
                    data_path = ds['analysis_path']
                    break
        else:
            data_path = data_all[0]['analysis_path']
    print(data_path)

    # assemble a label for this data set
    d = fd.parse_search_string(search_string)
    label = '_'.join([d['rig'], d['lighting'], d['result'], d['notes']])
    print('data label: ' + label + '\n')

    # load the data
    return fd.aggregate_loader(data_path), label

In [4]:
# define the name to be used for the saved figures
save_name = 'VPrey'

## Encounter analysis

### Load the encounter data

In [140]:
# create container for holding multiple data sets
data_dict = {}

# Load real prey capture in the light
search_string = 'result:succ, lighting:normal, rig:VR, analysis_type:aggEnc'
ds, label = load_dataset(search_string, exclusion='obstacle')
data_dict[label] = ds

# Load real prey capture in the dark
search_string = 'result:succ, lighting:dark, rig:VR, analysis_type:aggEnc'
ds, label = load_dataset(search_string)
data_dict[label] = ds

# Load VR prey capture with black cricket
search_string = 'result:test, lighting:normal, rig:VPrey, analysis_type:aggEnc, notes:blackCr_crickets_0_vrcrickets_1'
ds, label = load_dataset(search_string)
data_dict[label] = ds

# Get rid of doubled data set
del ds


J:\Drago Guggiana Nilo\Prey_capture\AnalyzedData\preprocessing_succ_VR_normal_ALL_crickets_1_vrcrickets_0_ALL_ALL_2020-06-19T00-00-00_ALL_aggEnc.hdf5
data label: VR_normal_succ_ALL

J:\Drago Guggiana Nilo\Prey_capture\AnalyzedData\preprocessing_succ_VR_dark_ALL_crickets_1_vrcrickets_0_ALL_ALL_2020-06-19T00-00-00_ALL_aggEnc.hdf5
data label: VR_dark_succ_ALL

J:\Drago Guggiana Nilo\Prey_capture\AnalyzedData\preprocessing_test_VPrey_normal_ALL_blackCr_crickets_0_vrcrickets_1_ALL_ALL_ALL_ALL_aggEnc.hdf5
data label: VPrey_normal_test_blackCr_crickets_0_vrcrickets_1



### Encounter averages

In [14]:
plot_dict = {}

for name in data_dict.keys():

    data = data_dict[name]

    plot_container = {}

    # visualize encounter variables
    encounter_angle_variables = ['mouse_heading', 'cricket_0_heading', 'cricket_0_delta_heading']
    encounter_nonangle_variables = ['cricket_0_mouse_distance', 'mouse_speed', 'mouse_acceleration', 'cricket_0_speed', 'cricket_0_acceleration']

    angled_params = data[encounter_angle_variables]
    angled_params['event_index'] = data.index

    nonangled_params = data[encounter_nonangle_variables]
    nonangled_params['event_index'] = data.index

    angled_average = fk.wrap(angled_params.groupby('event_index').agg(lambda x: 180 + fk.circmean_deg(x)))
    angled_std = pd.DataFrame(fk.unwrap(angled_params.groupby('event_index').agg(lambda x: fk.circstd_deg(x)/np.sqrt(x.shape[0]))), columns=encounter_angle_variables)

    nonangled_average = nonangled_params.groupby('event_index').mean()
    nonangled_std = pd.DataFrame(nonangled_params.groupby('event_index').sem(), columns=encounter_nonangle_variables)

    encounter_average = pd.concat((angled_average, nonangled_average), axis=1)
    encounter_sem = pd.concat((angled_std, nonangled_std), axis=1)

    # plot the results
    # define the variables to plot from
    encounter_variables = encounter_angle_variables + encounter_nonangle_variables
    # get the trials
    trial_list = data['trial_id'].unique()
    # get the time vector
    time_vector = data.loc[(data['event_id'] == 0) & (data['trial_id'] == trial_list[0]), 'time_vector'].to_numpy()

    # for each of the variables
    for var_count, variable in enumerate(encounter_variables):
        x = np.arange(encounter_average[variable].size)
        y = encounter_average[variable].to_numpy()
        yerr = encounter_sem[variable].to_numpy()

        plot_container[variable] = hv.Spread((list(x), list(y), list(yerr)), label=name).opts(title=variable) * hv.Curve((list(x), list(y))).opts(color='black')


    plot_dict[('variables', name)] = hv.GridSpace(plot_container, kdims=['variable'])

encounters = hv.GridSpace(plot_dict, kdims=['variables', 'dataset']).opts(plot_size=300)
full_panel = pn.panel(encounters, center=True, widget_location='top')
full_panel


### Number of encounters per trial

In [141]:
# Plot the number of encounters per trial for each condition

# allocate a list for the plots
plot_list = []

for name in data_dict.keys():

    data = data_dict[name]

    # load the parameter
    parameter = data[['event_id','trial_id']].copy()
    # find the number of encounters
    grouped_parameter = parameter.groupby(['trial_id']).agg(list)
    encounters = np.array([el[-1] for el in grouped_parameter['event_id']]) + 1

    # plot the results
    enc_plot = hv.Bars((np.arange(encounters.shape[0]), encounters)).opts(title=name, xlabel='trial', ylabel='# encounters') * \
        hv.HLine(encounters.mean()).opts(color='red', line_width=1)
    plot_list.append(enc_plot)


encounters_panel = hv.Layout(plot_list)
save_path = os.path.join(paths.figures_path, '_'.join([save_name, 'encounters']))
hv.save(encounters_panel, save_path, fmt='png')

# display the image
encounters_panel


### PCA of encounter types

In [6]:
# define the target parameter and PCA
target_parameter = 'cricket_0_mouse_distance'

# container for plots
plot_list = []

# container for PCA fit
pca_transforms = []

# container for target data
target = []

for name in data_dict.keys():

    data = data_dict[name]

    # assemble the array with the parameters of choice
    target_data = data[[target_parameter] + ['event_id', 'trial_id']].groupby(['trial_id', 'event_id']).agg(list).to_numpy()

    # HACK REMOVE
    target_data = np.array([el for sublist in target_data for el in sublist if len(el) == 594])

    target.append(target_data)

    # PCA the data before clustering
    pca = decomp.PCA()
    transformed_data = pca.fit_transform(target_data)
    pca_transforms.append(transformed_data)

    # fp.plot_2d([[pca.explained_variance_ratio_]])
    exp_var = hv.Curve(pca.explained_variance_ratio_).opts(xlabel='PCs', ylabel='explained variance', title=name)
    plot_list.append(exp_var)

hv.Layout(plot_list)

### Guassian mixture model of clusters

In [7]:
# Cluster the data using GMMs
plot_list = []
clusters = []

for transformed_data, name in zip(pca_transforms, data_dict.keys()):
    
    # define the vector of components
    component_vector = [2, 3, 4, 5, 10, 15]
    # allocate memory for the results
    gmms = []
    # for all the component numbers
    for comp in component_vector:
        # # define the number of components
        gmm = mix.GaussianMixture(n_components=comp, covariance_type='diag', n_init=50)
        gmm.fit(transformed_data[:, :7])
        gmms.append(gmm.bic(transformed_data[:, :7]))    # Pull the first 7 PCs and get the bayesian information criterion

    # select the minimum bic number of components
    n_components = np.array(component_vector)[np.argmin(gmms)]
    # predict the cluster indexes
    gmm = mix.GaussianMixture(n_components=n_components, covariance_type='diag', n_init=50)
    cluster_idx = gmm.fit_predict(transformed_data[:, :7])

    # discard singletons
    # turn cluster_idx in a float
    cluster_idx = cluster_idx.astype(float)
    # get the IDs
    clu_unique = np.unique(cluster_idx)
    for clu in clu_unique:
        # get the number of traces in the cluster
        number_traces = sum(cluster_idx==clu)
        # if it's less than 5, eliminate the cluster
        if number_traces < 5:
            cluster_idx[cluster_idx==clu] = np.nan
    clusters.append(cluster_idx)
        
    # plot the BIC
    BIC = hv.Curve((component_vector, gmms)).opts(title=name, xlabel='cluster', ylabel='BIC')
    plot_list.append(BIC)

hv.Layout(plot_list)


In [18]:
# plot the clusters
plot_list = []

for target_data, cluster_idx, name in zip(target, clusters, data_dict.keys()):

    # add the cluster indexes to the dataframe
    cluster_data = np.array([np.mean(target_data[cluster_idx == el, :], axis=0) for el in np.arange(n_components)])
    cluster_std = np.array([np.std(target_data[cluster_idx == el, :], axis=0)/np.sqrt(np.sum(cluster_idx == el))
                            for el in np.arange(n_components)])
    # plot the results
    cluster_plot = hv.Overlay([hv.Curve(el, label=str(idx), kdims=['Time (s)'], vdims=[target_parameter.replace('_', ' ')+' (px)']) for idx, el in enumerate(cluster_data)] + 
                                [hv.Spread((np.arange(el.shape[0]),el,cluster_std[idx, :])) for idx, el in enumerate(cluster_data)])

    cluster_plot.relabel('Clusters').opts({'Curve': dict(color=hv.Palette('Category20')), 
                                            'Spread': dict(color=hv.Palette('Category20'))})
    
    cluster_plot.opts(title=name)

    # For publication-ready image
    cluster_plot.opts(
        opts.Curve(
                    width=fp.pix(10.7), height=fp.pix(5), 
                    toolbar=None, hooks=[fp.margin], 
                    fontsize=fp.font_sizes['small'], 
                    line_width=12, xticks=3, yticks=3
                    ),
        opts.Overlay(legend_position='right', text_font='Arial')
        )

    # cluster_plot.opts(
    #     opts.Curve(
    #                 # width=fp.pix(10.7), height=fp.pix(5), 
    #                 # toolbar=None, hooks=[fp.margin], 
    #                 # fontsize=fp.font_sizes['small'], 
    #                 # line_width=12, 
    #                 xticks=3, yticks=3
    #                 ),
    #     opts.Overlay(legend_position='right', text_font='Arial')
    # )

    plot_list.append(cluster_plot)
    # print(cluster_plot)

cluster_trace_panel = hv.Layout(plot_list)
# assemble the save path
save_path = os.path.join(paths.figures_path, '_'.join([save_name, target_paramter, 'cluster']))
hv.save(cluster_trace_panel, save_path, fmt='png')

# display the image
cluster_trace_panel



In [30]:
plot_list = []

for target_data, cluster_idx, name in zip(target, clusters, data_dict.keys()):
    image = hv.Image(target_data[cluster_idx == 5, :], kdims=['Time','Encounters']).opts(title=name)
    plot_list.append(image)




In [19]:
# plot the clusters as an image
plot_list = []
for name in data_dict.keys():
    data = data_dict[name]

    # group the single traces
    grouped_parameter = data.loc[:, [target_parameter] + ['event_id', 'trial_id']].groupby(['trial_id', 'event_id']).agg(list).to_numpy()

    # print(grouped_parameter)
    # grouped_parameter = np.array([el for el in grouped_parameter[target]])
    grouped_parameter = np.array([el for sublist in grouped_parameter for el in sublist if len(el) == 594])


    # plot all traces
    [sorted_traces,_,_] = fp.sort_traces(grouped_parameter)

    image = hv.Image(sorted_traces, ['Time','Trial #'], 
                    [target_parameter.replace('_', ' ')], 
                    bounds=[0, 0, target_data.shape[1], target_data.shape[0]]
                    ).opts(title=name)

    # For publication-ready image                
    # image.opts(
    #         width=fp.pix(5.8), 
    #         height=fp.pix(5.8), 
    #         toolbar=None, 
    #         hooks=[fp.margin], 
    #         fontsize=fp.font_sizes['small'], 
    #         xticks=3, yticks=3, 
    #         colorbar=True, cmap='viridis', 
    #         colorbar_opts={'major_label_text_align': 'left'}
    #         )

    image.opts(
            # width=fp.pix(5.8), 
            # height=fp.pix(5.8), 
            toolbar=None, 
            hooks=[fp.margin], 
            #fontsize=fp.font_sizes['small'], 
            #xticks=3, yticks=3, 
            colorbar=True, cmap='viridis', 
            colorbar_opts={'major_label_text_align': 'left'}
            )

    plot_list.append(image)


sorted_cluster_heatmap_panel = hv.Layout(plot_list)

# assemble the save path
save_path = os.path.join(paths.figures_path,'_'.join([save_name, target_parameter]))
hv.save(sorted_cluster_heatmap_panel, save_path, fmt='png')

# display the image
sorted_cluster_heatmap_panel

### UMAP Embedding

In [62]:
# UMAP
plot_list = []

for transformed_data, cluster_idx, name in zip(pca_transforms, clusters, data_dict.keys()):
    # Pull data from storage dictionary
    data = data_dict[name]

    # embed the data via UMAP
    reducer = umap.UMAP(min_dist=0.5, n_neighbors=10)
    embedded_data = reducer.fit_transform(transformed_data)

    #--- Plot the embedding ---#

    # use the cluster indexes
    umap_data = np.concatenate((embedded_data,np.expand_dims(cluster_idx, axis=1)),axis=1)

    # # use the trial ID
    # # group the single traces
    # grouped_parameter = data.loc[:, ['event_id', 'trial_id']].groupby(
    #     ['trial_id']).agg(list)
    # temp_parameter = []
    # counter = 0
    # for idx, el in enumerate(grouped_parameter['event_id']):
    #     # get the event ids
    #     event_ids = np.unique(el)
    #     temp_parameter.append(idx*np.ones(event_ids.shape[0]))

    # grouped_parameter = np.concatenate(temp_parameter, axis=0)
    # umap_data = np.concatenate((embedded_data,np.expand_dims(grouped_parameter, axis=1)),axis=1)

    # highlight the last encounter of every group
    # allocate a list for that 
    winner_list = []
    grouped_parameter = data.loc[:, ['event_id', 'trial_id']].groupby(['trial_id']).agg(list)

    # for all the trials
    for idx, el in enumerate(grouped_parameter['event_id']):
        # get the event ids
        encounter_list = np.zeros(np.unique(el).shape[0])
        encounter_list[-1] = 1
        winner_list.append(encounter_list)

    grouped_parameter = np.concatenate(winner_list, axis=0)


    umap_plot = hv.Scatter(umap_data, vdims=['Dim 2','cluster'], kdims=['Dim 1'])
    umap_plot.opts(title=name, color='cluster', colorbar=True, cmap='Category10', size=5)

    # # For publication-ready image      
    # umap_plot.opts(color='cluster', colorbar=True, cmap='Category10', size=20)          
    # umap_plot.opts(
    #     opts.Scatter(
    #         width=fp.pix(5.7), 
    #         height=fp.pix(7.8), 
    #         toolbar=None, 
    #         hooks=[fp.margin], 
    #         fontsize=fp.font_sizes['small'], 
    #         xticks=3, 
    #         yticks=3
    #         )
    #     )

    umap_plot.opts(
        opts.Scatter(
            # width=fp.pix(5.7), 
            # height=fp.pix(7.8), 
            toolbar=None, 
            # hooks=[fp.margin], 
            fontsize=fp.font_sizes['small'], 
            xticks=3, 
            yticks=3
            )
        )

    #             opts.Overlay(legend_position='right', text_font='Arial'))

    # winner_data = embedded_data[grouped_parameter==1]

    # winner_plot = hv.Scatter(winner_data, vdims=['Dim 2'], kdims=['Dim 1'])
    # winner_plot.opts(width=fp.pix(5.7), height=fp.pix(7.8), toolbar=None, 
    #                         hooks=[fp.margin], fontsize=fp.font_sizes['small'], xticks=3, yticks=3, color='black', size=20)
    # umap_overlay = umap_plot*winner_plot

    plot_list.append(umap_plot)

umap_panel = hv.Layout(plot_list)

# assemble the save path
save_path = os.path.join(paths.figures_path,'_'.join([save_name, 'umap']))
hv.save(umap_panel, save_path, fmt='png')

# display the image
umap_panel


NameError: name 'pca_transforms' is not defined

## Binned time analysis

In [5]:
### Load new data
# create container for holding multiple data sets
data_dict = {}

search_string = 'result:test, rig:VPrey, analysis_type:aggBin, notes:blackCr_rewarded_crickets_0_vrcrickets_1'
ds, label = load_dataset(search_string)
label = 'VPrey_blackCr_CR0_VRCR_1'
data_dict[label] = ds

search_string = 'result:test, rig:VPrey, analysis_type:aggBin, notes:whiteCr_blackBG_rewarded_crickets_0_vrcrickets_1'
ds, label = load_dataset(search_string)
label = 'VPrey_whiteCr_CR0_VRCR_1'
data_dict[label] = ds

search_string = 'result:succ, rig:VPrey, analysis_type:aggBin, notes:blackCr_crickets_1_vrcrickets_1'
ds, label = load_dataset(search_string)
label = 'VPrey_blackCr_CR1_VRCR_1'
data_dict[label] = ds

search_string = 'result:succ, rig:VPrey, analysis_type:aggBin, notes:whiteCr_blackBG_crickets_1_vrcrickets_1'
ds, label = load_dataset(search_string)
label = 'VPrey_whiteCr_CR1_VRCR_1'
data_dict[label] = ds

# Get rid of doubled data set
del ds

J:\Drago Guggiana Nilo\Prey_capture\AnalyzedData\preprocessing_test_VPrey_ALL_ALL_blackCr_rewarded_crickets_0_vrcrickets_1_ALL_ALL_2020-08-23T00-00-00_ALL_aggBin.hdf5
data label: VPrey_ALL_test_blackCr_rewarded_crickets_0_vrcrickets_1

J:\Drago Guggiana Nilo\Prey_capture\AnalyzedData\preprocessing_test_VPrey_ALL_ALL_whiteCr_blackBG_rewarded_crickets_0_vrcrickets_1_ALL_ALL_2020-08-23T00-00-00_ALL_aggBin.hdf5
data label: VPrey_ALL_test_whiteCr_blackBG_rewarded_crickets_0_vrcrickets_1

J:\Drago Guggiana Nilo\Prey_capture\AnalyzedData\preprocessing_succ_VPrey_ALL_ALL_blackCr_crickets_1_vrcrickets_1_ALL_ALL_2020-08-23T00-00-00_ALL_aggBin.hdf5
data label: VPrey_ALL_succ_blackCr_crickets_1_vrcrickets_1

J:\Drago Guggiana Nilo\Prey_capture\AnalyzedData\preprocessing_succ_VPrey_ALL_ALL_whiteCr_blackBG_crickets_1_vrcrickets_1_ALL_ALL_2020-08-23T00-00-00_ALL_aggBin.hdf5
data label: VPrey_ALL_succ_whiteCr_blackBG_crickets_1_vrcrickets_1



### Heatmaps of encounter

In [35]:
# Plot example encounter traces sorted

# define the target parameter
target_parameters = ['mouse_speed', 'vrcricket_0_mouse_distance', 'vrcricket_0_speed', 'cricket_0_mouse_distance', 'cricket_0_speed']

# allocate a list for the plots
plot_list = []

keys = list(data_dict.keys())
for name in keys:

    data = data_dict[name]
    cluster_idx = None

    # for all the parameters
    for target_param in target_parameters:

        # load the parameter
        try:
            parameter = data[[target_param, 'trial_id']].copy()
        except KeyError:
            if ('cricket' in target_param) and ('vr' not in target_param):
                hmap = hv.Empty()
                plot_list.append(hmap)
                continue

        # group the single traces
        grouped_parameter = parameter.groupby(['trial_id']).agg(list)
        grouped_parameter = np.array([el for el in grouped_parameter[target_param]])
        if np.argwhere(np.isinf(grouped_parameter)).size != 0:
            grouped_parameter[grouped_parameter == np.inf] = 0
        
        # get the clustering for first parameter, and preserve that sorting for all other target parameters tested
        if cluster_idx is None:
                [sorted_traces, cluster_idx, clusters] = fp.sort_traces(grouped_parameter)
        else:
                sorted_traces = grouped_parameter[cluster_idx, :]
        
        # plot all traces
        hmap = hv.Image(sorted_traces, ['Binned Time','Trial #'],
                        [target_param.replace('_', ' ')], 
                        bounds=[0, 0, grouped_parameter.shape[1], grouped_parameter.shape[0]],
                        group=name, 
                        label=target_param)

        # For publication-ready image                
        hmap.opts(
                width=fp.pix(5.8), 
                height=fp.pix(5.8), 
                toolbar=None, 
                hooks=[fp.margin], 
                fontsize=fp.font_sizes['small'], 
                xticks=3, yticks=3, 
                colorbar=True, cmap='viridis', 
                colorbar_opts={'major_label_text_align': 'left'}
                )

        # hmap.opts(
        #         width=fp.pix(1.5), 
        #         height=fp.pix(1.5), 
        #         toolbar=None, 
        #         # hooks=[fp.margin], 
        #         #fontsize=fp.font_sizes['small'], 
        #         xticks=3, yticks=3, 
        #         colorbar=True, cmap='viridis', 
        #         colorbar_opts={'major_label_text_align': 'left'}
        #         )

        plot_list.append(hmap)

heatmaps = hv.Layout(plot_list).cols(len(target_parameters))
save_path = os.path.join(paths.figures_path, '_'.join([save_name, 'binned_kinematics']))
hv.save(heatmaps, save_path, fmt='png')
heatmaps

In [34]:
# Plot example encounter traces sorted by cluster

# define the target parameter
target_parameters = ['mouse_speed', 'vrcricket_0_mouse_distance', 'vrcricket_0_speed', 'cricket_0_mouse_distance', 'cricket_0_speed']
# target_parameters = ['vrcricket_0_speed']

# allocate a list for the plots
plot_list = []

keys = list(data_dict.keys())
for name in keys:

    data = data_dict[name]
    cluster_idx = None

    # for all the parameters
    for target_param in target_parameters:

        # load the parameter
        try:
            parameter = data[[target_param, 'trial_id']].copy()
        except KeyError:
            if ('cricket' in target_param) and ('vr' not in target_param):
                hmap = hv.Empty()
                plot_list.append(hmap)
                continue

        # group the single traces
        grouped_parameter = parameter.groupby(['trial_id']).agg(list)
        grouped_parameter = np.array([el for el in grouped_parameter[target_param]])
        if np.argwhere(np.isinf(grouped_parameter)).size != 0:
            grouped_parameter[grouped_parameter == np.inf] = 0
            # idxs = np.argwhere(np.isinf(grouped_parameter))
            # for idx in idxs:
            #     grouped_parameter[idx[0], idx[1]] = 0

        
        # get the clustering for first parameter, and preserve that sorting for all other target parameters tested 
        # in this group
        if cluster_idx is None:
                [sorted_traces, cluster_idx, clusters] = fp.sort_traces(grouped_parameter)
        else:
                sorted_traces = grouped_parameter[cluster_idx, :]
        clu_vector = np.arange(np.max(clusters)+1)

        # plot all traces
        overlay_list = []

        # get the statistics of the cluster so we set the same bins for all plots of the same variable
        # HACK: this gets rid of zero values, need to find a way to show them in the speed plots
        lower = np.log10(np.min(sorted_traces[np.nonzero(sorted_traces)]))
        upper = np.ceil(np.log10(np.max(sorted_traces)))
        bin_edges = np.logspace(lower, upper, 50)

        # Go through each cluster and plot on a histogram
        for clu in np.unique(clusters):
            idxs = cluster_idx[clusters == clu]
            cluster_traces = sorted_traces[idxs, :]
            
            freq, edges = np.histogram(cluster_traces, bin_edges)
            
            hist = hv.Histogram((edges, freq), 
                group=': '.join((name, target_param)), 
                label=str(clu+1)
                )
            hist.opts(logx=True, alpha=0.3)

            # If we are the last cluster, add a reference line
            if clu == max(clusters):
                vline = hv.VLine(1e-2).opts(color='red', line_width=1, line_dash='dashed')
                hist = (hist * vline)
            
            overlay_list.append(hist)

        # Create an overlay of all the cluster histograms
        full_overlay = hv.Overlay(overlay_list)
        full_overlay.opts(legend_position='right', width=fp.pix(1.5), height=fp.pix(1.5))
        
        plot_list.append(full_overlay)


param_hists = hv.Layout(plot_list).opts(shared_axes=True).cols(len(target_parameters))
save_path = os.path.join(paths.figures_path, '_'.join([save_name, 'histogram_kinematics']))
hv.save(param_hists, save_path, fmt='png')
param_hists

In [30]:
# Plot the duration of the trial for each trial type based on heirarchical clustering

target_param = 'cricket_0_mouse_distance'

# allocate a list for the plots
plot_list = []

for name in data_dict.keys():

# keys = list(data_dict.keys())
# for name in keys[:1]:

    data = data_dict[name]

    # load the parameter
    parameter = data[[target_param, 'trial_id']].copy()
    # group the single traces
    grouped_parameter = parameter.groupby(['trial_id']).agg(list)
    grouped_parameter = np.array([el for el in grouped_parameter[target_param]])

    # plot all traces

    [_,_,cluster_idx] = fp.sort_traces(grouped_parameter)

    times = data[['time_vector','trial_id']].copy()
    times = times.groupby(['trial_id']).agg(list)
    
    # allocate a list for the durations
    duration_list = []

    # for all the trials
    for trial in times['time_vector']:
        duration_list.append(trial[-1])

    # turn the durations into an array
    duration_list = np.array(duration_list)

    # allocate a list for the duration averages
    print('Num. Clusters:', np.max(cluster_idx)+1)
    duration_averages = np.zeros((np.max(cluster_idx)+1, 2))

    # for all the clusters
    for clu in cluster_idx:
        duration_averages[clu, 0] = np.mean(duration_list[cluster_idx==clu])
        duration_averages[clu, 1] = sem(duration_list[cluster_idx==clu])
        
        
    # plot the results
    clu_vector = np.arange(np.max(cluster_idx)+1)
    errorbar = hv.ErrorBars((clu_vector, duration_averages[:, 0], duration_averages[:, 1])) * \
        hv.Bars((clu_vector, duration_averages[:, 0]))
    errorbar.opts(title=name, xlabel='group', ylabel='seconds')

    plot_list.append(errorbar)

hv.Layout(plot_list)

Num. Clusters: 10
Num. Clusters: 10


In [15]:

# allocate a list for the plots
plot_list = []

for data in [Lprey_light_data, Lprey_dark_data]:
    # define the target parameter and PCA
    target_parameter = 'cricket_0_mouse_distance'

    # load the parameter
    parameter = data[[target_parameter,'trial_id']].copy()
    # group the single traces
    target_data = parameter.groupby(['trial_id']).agg(list).to_numpy()
    target_data = np.array([el for sublist in target_data for el in sublist])

    # PCA the data before clustering
    pca = decomp.PCA()
    transformed_data = pca.fit_transform(target_data)
    # fp.plot_2d([[pca.explained_variance_ratio_]])

    curve = hv.Curve(np.cumsum(pca.explained_variance_ratio_)/np.sum(pca.explained_variance_ratio_))
    curve.opts(tools=['hover'])
    # define the number of PCs to use
    pc_number = 7

    plot_list.append(curve)

hv.Layout(plot_list)

NameError: name 'Lprey_light_data' is not defined

### Full aggregate analysis

In [5]:
### Load new data
# create container for holding multiple data sets
data_dict = {}

# Load real prey capture in the light
search_string = 'result:succ, lighting:normal, rig:VR, analysis_type:aggFull'
ds, label = load_dataset(search_string, exclusion='obstacle')
data_dict[label] = ds

# Load real prey capture in the dark
search_string = 'result:succ, lighting:dark, rig:VR, analysis_type:aggFull'
ds, label = load_dataset(search_string)
data_dict[label] = ds

# Get rid of doubled data set
del ds

J:\Drago Guggiana Nilo\Prey_capture\AnalyzedData\preprocessing_succ_VR_normal_ALL_crickets_1_vrcrickets_0_ALL_ALL_2020-06-19T00-00-00_ALL_aggFull.hdf5
data label: VR_normal_succ

J:\Drago Guggiana Nilo\Prey_capture\AnalyzedData\preprocessing_succ_VR_dark_ALL_crickets_1_vrcrickets_0_ALL_ALL_2020-06-19T00-00-00_ALL_aggFull.hdf5
data label: VR_dark_succ



In [6]:
# Plot historgrams of trial duration

# allocate a list for the plots
plot_list = []

for name in data_dict.keys():

    data = data_dict[name]

    times = data[['time_vector', 'trial_id']].copy()
    times = times.groupby(['trial_id']).agg(list)
    duration_list = np.array([trial[-1] for trial in times['time_vector']])

    # plot the results
    duration_histogram = hv.Bars(duration_list).opts(title=name, xlabel='trial', ylabel='duration')
    plot_list.append(duration_histogram)

hv.Layout(plot_list)