In [1]:
import pandas as pd
import numpy as np

import os.path as op
import sys

sys.path.append(op.abspath('../code'))

import plotting_tools

import importlib

import data_loaders as dload
import clusterize as cluster

import ipywidgets

In [2]:
default_path = '/Users/hugofluhr/Library/CloudStorage/GoogleDrive-hugo.fluhr@gmail.com/My Drive/Piriform_neurons/data/data_xls_v2'

path_to_data_widg = ipywidgets.Textarea(value=default_path,
                                        placeholder='Insert your path here',
                                        description='Path to data:',
                                        disabled=False,
                                        layout=ipywidgets.Layout(width='850px', height='30px'))

### Setting path
Here you may set the path and name of your data folder.

In [3]:
display(path_to_data_widg)

Textarea(value='/Users/hugofluhr/Library/CloudStorage/GoogleDrive-hugo.fluhr@gmail.com/My Drive/Piriform_neuro…

In [4]:
importlib.reload(dload)
path_to_data = path_to_data_widg.value
neuron_df = dload.get_neuron_matrix(path_to_data, verb = False)

cluster_df = neuron_df.apply(cluster.normalize)

IntProgress(value=1, description='Loading data:', max=103, min=1, style=ProgressStyle(bar_color='#AAAAAA'))

In [5]:
feature_selection =  ipywidgets.SelectMultiple(options=cluster_df.columns.tolist(),
                                                value=cluster_df.columns.tolist(),
                                                rows=len(cluster_df.columns.tolist())+1,
                                                description='Features',
                                                description_width = 40,
                                                disabled=False)

#feature_selection_tag = ipywidgets.TagsInput(value=cluster_df.columns.tolist(),
#                                            allowed_tags=cluster_df.columns.tolist(),
#                                            allow_duplicates=False)

### Feature selection
Here you may select the features you wish to keep for the clustering. To do so, please, hold `CMD` (or `CTRL` for Windows users) and select the desired features. You can select all feature using `SHIFT` and sliding from the top to the bottom of the list.

In [6]:
display(feature_selection)

SelectMultiple(description='Features', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18…

In [None]:
feature_selection.value

In [None]:
cluster_filtered_df = cluster_df.loc[:, feature_selection.value].dropna(axis = 0).copy()

print(f' {cluster_filtered_df.shape[1]} columns selected for the clustering :\n-',
      '\n- '.join(cluster_filtered_df.columns.tolist()))
print(f'\n{cluster_filtered_df.shape[0]} neurons selected for the clustering (after clearing missing values) :\n',
      cluster_filtered_df.index.tolist())

In [None]:
default_save_path = '/mnt/c/Users/Cionkito/Documents/Brainhack2022/Piriform_neurons/results'

save_path_widg = ipywidgets.Textarea(value=default_save_path,
                                     placeholder='Insert your path here',
                                     description='',
                                     disabled=False,
                                     layout=ipywidgets.Layout(width='750px', height='30px'))

show_figures = ipywidgets.ToggleButton(value=True,
                    description='Show figures',
                    disabled=False,
                    indent=False)

save_figures = ipywidgets.ToggleButton(value=False,
                    description='Save figures',
                    disabled=False,
                    indent=False)

save_labels = ipywidgets.ToggleButton(value=False,
                    description='Save clusters',
                    disabled=False,
                    indent=False)

### Saving results parameters
Here you can specify if you wish to show and save the clustering figures (and labels) by pressing the respective butttons.

In [None]:
ipywidgets.VBox([ipywidgets.Label('Where to save :'), save_path_widg, ipywidgets.HBox([show_figures, save_figures, save_labels])])

### Clustering parameters
This hierarchical agglomerative clustering requires one parameter:
- The number of cluster : desired number of clusters (usually based on hypothesis)
- The distance threshold : distance where to "cut" the herarchical tree structure

Below you can chose which parameter you wish to specify:

In [None]:
cluster_choice = ipywidgets.Dropdown(options = [('Number of clusters', 1), ('Distance threshold', 2)], value = 2)
n_cluster_dropdown = ipywidgets.BoundedIntText(value = 3, min = 2, max = 10)
d_thresh_slider = ipywidgets.FloatSlider(value = 11.9, min = 0, max = 20, step = 0.1)

ipywidgets.VBox([cluster_choice, n_cluster_dropdown, d_thresh_slider])

### Interpretation of clustering results (below)
- The clustering stability (in distance) shows the distance before two clusters are merged into one. In other words, the larger the distance, the more stable is the clustering for a specified  number of clusters _k_.
- The number of clusters is only given if the clustering is parametrized through a specified distance.
- The _Groups_ shows the number of samples (neurons) in each cluster.

In [None]:
importlib.reload(cluster)

# Conditions to for the specified clustering parameter
param_n_clusters = n_cluster_dropdown.value
param_d_thresh = None

if cluster_choice.value == 2:
    param_n_clusters = None
    param_d_thresh = d_thresh_slider.value

cluster_model, label_2_color = cluster.hierarchical_cstm(cluster_filtered_df, n_clusters = param_n_clusters, d_thresh = param_d_thresh,
                                                         figsize = (30, 10), plot = show_figures.value)

labels = cluster_model.labels_
n_clusters = labels.max() + 1

if save_figures.value:
    plotting_tools.save_figure(op.join(save_path_widg.value,
                                       f'hierarchical-tree-{n_clusters:02d}clusters.png'))

centroids = cluster.get_centroids(cluster_model, cluster_filtered_df, col = label_2_color,
                                  plot = show_figures.value)

if save_figures.value:
    plotting_tools.save_figure(op.join(save_path_widg.value,
                                       f'hierarchical-centroids-{n_clusters:02d}clusters.png'))

dist_to_centroid = cluster.get_centroid_distance(cluster_model, cluster_filtered_df, centroids)

In [None]:
label_save_df = pd.DataFrame(labels, index=cluster_filtered_df.index, columns=[f'labels-{n_clusters}k'])

# Un-comment to save cluster labels (from 0 to n_cluster-1) in .csv file
# where each row is a neuron with an ID and its corresponding label

if save_labels.value:
    label_save_df.to_csv(op.join(save_path_widg.value, f'neuron_clustering-{n_clusters:d}k.csv'))

### List of neurons in each cluster :

In [None]:
for i in range(n_clusters):
    print(f'cluster #{i + 1} :', cluster_filtered_df.loc[labels == i].index.tolist())

### Features with the least in-cluster variability  (most in-cluster similarity) :

In [None]:
for i, cluster_features in enumerate(dist_to_centroid.feat_by_radius):
    print(f'cluster #{i + 1:d}', cluster_features[:4])