In [None]:
%load_ext autoreload
%autoreload 2
import os
import sys
import numpy as np
import pandas as pd
import xarray as xr
import umap
from scipy.ndimage import gaussian_filter
import plotly.express as px
from os.path import join as pjoin
from tqdm.notebook import tqdm
import plotly.graph_objects as go
from scipy.stats import pearsonr, spearmanr, zscore

sys.path.append('/home/austinbaggetta/csstorage3/CircleTrack/CircleTrackAnalysis')
import circletrack_behavior as ctb
import circletrack_neural as ctn
import plotting_functions as pf
import place_cells as pc
import pca_ica as pca

xr.set_options(keep_attrs=True)

In [None]:
## Settings
project_dir = 'MultiCon_Imaging'
experiment_dir = 'MultiCon_Imaging2'
dpath = f'../../../{project_dir}/{experiment_dir}/output/'
fig_path = f'../../../{project_dir}/{experiment_dir}/intermediate_figures/'
mouse_list = ['mc42', 'mc43']
session_list = [f'A{x}' for x in np.arange(1, 6)] + [f'B{x}' for x in np.arange(1, 6)] + [f'C{x}' for x in np.arange(1, 6)] + [f'D{x}' for x in np.arange(1, 6)]

Analysis inspired by Wannan et al., Selection of experience for memory by hippocampal sharp wave ripples. DOI: 10.1126/science.adk8261

From their methods section: 
Neural data were first preprocessed before dimensionality reduction. Neural spiking data
(spike count) during maze learning was binned into 100-ms bin. The data was then smoothed using
a 500-ms wide Gaussian kernel and z-scored. The UMAP dimensionality reduction algorithm was then applied to this matrix. Each point in the low-dimensional manifold corresponds
to the population activity at a single time bin in the session.

UMAP hyperparameters used were: n_neighbors = 20, metric = 'cosine', output_metric =
'euclidean', learning_rate = 1.0, init = 'spectral', min_dist = 0.1, spread = 1.0, repulsion_strength =
1.0, negative_sample_rate = 5, target_metric = 'categorical', dens_lambda = 2.0, dens_frac = 0.3,
dens_var_shift=0.1.

With supervised methods, data from the same trial were given the same label. Data points
that share the same labels were leveraged so that similar points were embedded closer together.
For example, visualizations in Fig. 1G, H were produced by supervised UAMP dimensionality
reduction, where we exploited this option by using trial block labels in the behavioral data as
supervised information. This was implemented by passing the trial block number as target when
calling the fit_transform function...

### Example from umap documentation.

In [None]:
from sklearn.datasets import load_digits

digits = load_digits()

embedding = umap.UMAP(n_neighbors=20,
                      min_dist=0.1,
                      metric='cosine').fit_transform(digits.data)

In [None]:
fig = pf.custom_graph_template(x_title='', y_title='')
fig.add_trace(go.Scatter(x=embedding[:, 0], y=embedding[:, 1], mode='markers', marker_color='darkgrey', marker_size=3))
fig.show()

### Single mouse example.

In [None]:
mouse = 'mc26'
data_type = 'S'
day = '20'
number_of_binned_trials = 4
bin_size_seconds = 0.1
fps = 30
samples_per_bin = bin_size_seconds * fps

spike_data = xr.open_dataset(pjoin(dpath, f'aligned_minian/{mouse}/{data_type}/{mouse}_{data_type}_{day}.nc'))[data_type]
binarized_data = (spike_data > 0.1).astype(int)
trial_block = np.zeros(binarized_data['frame'].shape[0])
binarized_data = binarized_data.assign_coords(trial_block = ('frame', trial_block))

block_num = 1
for trial in np.unique(binarized_data['trials'] + 1): ## plus 1 to avoid 0 % 5, since trials start at 0
    if trial % number_of_binned_trials != 0:
        block_num = block_num
        binarized_data['trial_block'][binarized_data['trials'] + 1 == trial] = block_num
    else:
        binarized_data['trial_block'][binarized_data['trials'] + 1 == trial] = block_num
        block_num += 1
        
binned_data = pca.bin_transients(binarized_data, bin_size_in_seconds=0.1, fps=30, analysis_type='num_spikes')
binned_trial_blocks = binarized_data['trial_block'][::int(samples_per_bin)].values

# smoothed_data = gaussian_filter(binned_data, sigma=2)
zdata = zscore(binned_data, axis=1)

#### Testing with binarized data only.

In [None]:
embedding = umap.UMAP(n_neighbors=20,
                      min_dist=0.1, learning_rate=1.0,
                      metric='cosine').fit_transform(binarized_data.T, y=binarized_data['trial_block'])

In [None]:
fig = pf.custom_graph_template(x_title='', y_title='')
prev_trial_len = 0
for trial in np.unique(binarized_data['trial_block']):
    trial_len = binarized_data[:, binarized_data['trial_block'] == trial].shape[1]
    plot_data = embedding[prev_trial_len:prev_trial_len+trial_len, :]
    prev_trial_len += trial_len

    fig.add_trace(go.Scattergl(x=plot_data[:, 0], y=plot_data[:, 1], mode='markers', 
                               marker_size=3, name=f'Block {trial}', showlegend=False))
fig.show()

#### Testing with binned data.

In [None]:
## Supervised
embedding = umap.UMAP(n_neighbors=20,
                      min_dist=0.1, learning_rate=1.0,
                      metric='cosine').fit_transform(binned_data.T, y=binned_trial_blocks)

In [None]:
color_list = px.colors.sequential.Viridis
da = xr.DataArray(binned_data, dims=['unit_id', 'frame'])
da = da.assign_coords(trial_block = ('frame', binned_trial_blocks))

fig = pf.custom_graph_template(x_title='', y_title='')
prev_trial_len = 0
for idx, trial in enumerate(np.unique(da['trial_block'])):
    trial_len = da[:, da['trial_block'] == trial].shape[1]
    plot_data = embedding[prev_trial_len:prev_trial_len+trial_len, :]
    prev_trial_len += trial_len

    fig.add_trace(go.Scattergl(x=plot_data[:, 0], y=plot_data[:, 1], mode='markers', marker_color=color_list[idx],
                               marker_size=3, name=f'Block {trial}', showlegend=False,
                               )
                            )
fig.show()

In [None]:
np.unique(da['trial_block'])

In [None]:
## Unsupervised
embedding = umap.UMAP(n_neighbors=20,
                      min_dist=0.1, learning_rate=1.0,
                      metric='cosine').fit_transform(binned_data.T)

In [None]:
color_list = px.colors.sequential.Viridis
da = xr.DataArray(binned_data, dims=['unit_id', 'frame'])
da = da.assign_coords(trial_block = ('frame', binned_trial_blocks))

fig = pf.custom_graph_template(x_title='', y_title='')
prev_trial_len = 0
for idx, trial in enumerate(np.unique(da['trial_block'])):
    trial_len = da[:, da['trial_block'] == trial].shape[1]
    plot_data = embedding[prev_trial_len:prev_trial_len+trial_len, :]
    prev_trial_len += trial_len

    fig.add_trace(go.Scattergl(x=plot_data[:, 0], y=plot_data[:, 1], mode='markers', marker_color=color_list[idx],
                               marker_size=3, name=f'Block {trial}', showlegend=False, opacity=0.7))
fig.show()

### First five days in A for a mouse.

In [None]:
mouse = 'mc26'
data_type = 'S'
day_list = ['1', '2', '3', '4', '5']
number_of_binned_trials = 4
bin_size_seconds = 0.1
fps = 30
samples_per_bin = bin_size_seconds * fps
color_list = px.colors.n_colors(lowcolor='rgb(175,238,238)', highcolor='rgb(50,100,0)', n_colors=20, colortype='rgb')

fig = pf.custom_graph_template(x_title='', y_title='', rows=1, columns=5, titles=[f'A{x}' for x in np.arange(1, len(day_list) + 1)],
                               shared_x=True, shared_y=True, width=1200)

for day_idx, day in enumerate(day_list):
    spike_data = xr.open_dataset(pjoin(dpath, f'aligned_minian/{mouse}/{data_type}/{mouse}_{data_type}_{day}.nc'))[data_type]
    binarized_data = (spike_data > 0.1).astype(int)
    trial_block = np.zeros(binarized_data['frame'].shape[0])
    binarized_data = binarized_data.assign_coords(trial_block = ('frame', trial_block))

    block_num = 1
    for trial in np.unique(binarized_data['trials'] + 1): ## plus 1 to avoid 0 % 5, since trials start at 0
        if trial % number_of_binned_trials != 0:
            block_num = block_num
            binarized_data['trial_block'][binarized_data['trials'] + 1 == trial] = block_num
        else:
            binarized_data['trial_block'][binarized_data['trials'] + 1 == trial] = block_num
            block_num += 1
            
    binned_data = pca.bin_transients(binarized_data, bin_size_in_seconds=0.1, fps=30, analysis_type='num_spikes')
    binned_trial_blocks = binarized_data['trial_block'][::int(samples_per_bin)].values

    ## Supervised
    embedding = umap.UMAP(n_neighbors=20, random_state=42, spread=1.0,
                          min_dist=0.1, learning_rate=1.0, init='spectral',
                          metric='cosine').fit_transform(binned_data.T, y=binned_trial_blocks)
    
    da = xr.DataArray(binned_data, dims=['unit_id', 'frame'])
    da = da.assign_coords(trial_block = ('frame', binned_trial_blocks))

    prev_trial_len = 0
    for idx, trial in enumerate(np.unique(da['trial_block'])):
        trial_len = da[:, da['trial_block'] == trial].shape[1]
        plot_data = embedding[prev_trial_len:prev_trial_len+trial_len, :]
        prev_trial_len += trial_len

        fig.add_trace(go.Scattergl(x=plot_data[:, 0], y=plot_data[:, 1], mode='markers', marker_color=color_list[idx],
                                   marker_size=3, name=f'Block {trial}', showlegend=False, legendgroup=trial), row=1, col=day_idx+1)
fig.update_yaxes(title='UMAP2', col=1)
fig.update_xaxes(title='UMAP1', row=1)
fig.show()

In [None]:
mouse = 'mc26'
data_type = 'S'
number_of_binned_trials = 4
bin_size_seconds = 0.1
fps = 30
samples_per_bin = bin_size_seconds * fps
color_list = px.colors.n_colors(lowcolor='rgb(175,238,238)', highcolor='rgb(50,100,0)', n_colors=20, colortype='rgb')

fig = pf.custom_graph_template(x_title='', y_title='', rows=4, columns=5, titles=session_list,
                               shared_x=True, shared_y=True, width=1200, height=1200)

mpath = pjoin(dpath, f'aligned_minian/{mouse}/{data_type}')
for index, session in tqdm(enumerate(os.listdir(mpath))):
    if (mouse == 'mc26') & (index > 10) & (index < 17):
        index += 1
    elif (mouse == 'mc26') & (index == 17):
        index += 2
    elif (mouse == 'mc23') & (index > 14):
        index += 1

    if index < 5:
        row, col = 1, index + 1
    elif (index >= 5) & (index < 10):
        row, col = 2, index - 4
    elif (index >= 10) & (index < 15):
        row, col = 3, index - 9
    elif index >= 15:
        row, col = 4, index - 14

    spike_data = xr.open_dataset(pjoin(mpath, session))['S'] ## select S matrix
    binarized_data = (spike_data > 0.1).astype(int)
    trial_block = np.zeros(binarized_data['frame'].shape[0])
    binarized_data = binarized_data.assign_coords(trial_block = ('frame', trial_block))

    block_num = 1
    for trial in np.unique(binarized_data['trials'] + 1): ## plus 1 to avoid 0 % 5, since trials start at 0
        if trial % number_of_binned_trials != 0:
            block_num = block_num
            binarized_data['trial_block'][binarized_data['trials'] + 1 == trial] = block_num
        else:
            binarized_data['trial_block'][binarized_data['trials'] + 1 == trial] = block_num
            block_num += 1
            
    binned_data = pca.bin_transients(binarized_data, bin_size_in_seconds=bin_size_seconds, fps=fps, analysis_type='num_spikes')
    binned_trial_blocks = binarized_data['trial_block'][::int(samples_per_bin)].values

    ## Supervised
    embedding = umap.UMAP(n_neighbors=20, random_state=42, spread=1.0,
                          min_dist=0.1, learning_rate=1.0, init='spectral',
                          metric='cosine').fit_transform(binned_data.T, y=binned_trial_blocks)
    
    da = xr.DataArray(binned_data, dims=['unit_id', 'frame'])
    da = da.assign_coords(trial_block = ('frame', binned_trial_blocks))

    prev_trial_len = 0
    for idx, trial in enumerate(np.unique(da['trial_block'])):
        trial_len = da[:, da['trial_block'] == trial].shape[1]
        plot_data = embedding[prev_trial_len:prev_trial_len+trial_len, :]
        prev_trial_len += trial_len

        fig.add_trace(go.Scattergl(x=plot_data[:, 0], y=plot_data[:, 1], mode='markers', marker_color=color_list[idx],
                                   marker_size=3, name=f'Block {trial}', showlegend=False, legendgroup=trial), row=row, col=col)
fig.update_yaxes(title='UMAP2', col=1)
fig.update_xaxes(title='UMAP1', row=4)
fig.show()
fig.write_image(pjoin(fig_path, f'trial_blocks_UMAP_{mouse}_{number_of_binned_trials}.png'))