# Trial clustering

In [1]:
""" 
IMPORTS
"""
import os
import autograd.numpy as np
import pickle
import seaborn as sns
from collections import defaultdict
import pandas as pd
from scipy.ndimage import gaussian_filter1d
from scipy import stats

# --Machine learning and statistics+
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer

from one.api import ONE
import matplotlib.pyplot as plt

# Get my functions
functions_path =  '/home/ines/repositories/representation_learning_variability/Functions/'
# functions_path = '/Users/ineslaranjeira/Documents/Repositories/representation_learning_variability/Functions/'
os.chdir(functions_path)
from plotting_functions import bins_per_trial_phase, broader_label
from one_functions_generic import prepro
from design_functions import wheel_velocity
from data_processing import time_intervals, process_quiescence
from wheel_functions import find_periods_below_threshold, create_movement_array, stack_trial_events, wheel_trial_epoch
one = ONE(base_url='https://alyx.internationalbrainlab.org')


## Parameters

In [2]:
bin_size = 0.1

data_path_v1 =  '/home/ines/repositories/representation_learning_variability/DATA/Sub-trial/Design matrix/' + 'v1/' + str(bin_size) + '/'
data_path_v2 =  '/home/ines/repositories/representation_learning_variability/DATA/Sub-trial/Design matrix/' + 'v2_3Nov2023/' + str(bin_size) + '/'
# data_path_v2 =  '/Users/ineslaranjeira/Documents/Repositories/representation_learning_variability/DATA/Sub-trial/Design matrix/' + 'v2_3Nov2023/' + str(bin_size) + '/'

data_path = data_path_v2

os.chdir(data_path)
design_matrices = os.listdir(data_path)

if data_path == data_path_v1:
       var_names = ['avg_wheel_vel', 'pupil_diameter',
              'whisker_me', 'nose_speed_X', 'nose_speed_Y', 
              'l_paw_speed_X', 'l_paw_speed_Y', 'pupil_speed_X',
              'pupil_speed_Y', 'Gaussian_licks']
elif data_path == data_path_v2:
       var_names = ['avg_wheel_vel', 'pupil_diameter',
              'whisker_me', 'nose_speed', 'l_paw_speed', 'left_X', 'left_Y',
              'pupil_speed', 'pupil_X', 'pupil_Y', 'Lick count']

# Plotting params
multiplier = 1/bin_size

event_type_list = ['goCueTrigger_times']  # , 'feedback_times', 'firstMovement_times'
event_type_name = ['Go cue']  # , 'Feedback time', 'First movement onset'

## Load data

List design matrices' eids

In [3]:
# Save data of all sessions for latter
idxs = []
mouse_names = []
for m, mat in enumerate(design_matrices):
    
    mouse_name = design_matrices[m][51:-4]
    eid = design_matrices[m][14:50]
    idx = str(eid + '_' + mouse_name)
    if m == 0:
        idxs = idx
        mouse_names = mouse_name
    else:
        idxs = np.hstack((idxs, idx))
        mouse_names = np.hstack((mouse_names, mouse_name))

In [4]:
# Save data of all sessions for latter
matrix_all = defaultdict(list)
matrix_all_unnorm = defaultdict(list)
session_all = defaultdict(list)

for m, mouse_name in enumerate(mouse_names):
    # Save results per mouse
    matrix_all[mouse_name] = {}
    session_all[mouse_name] = {}
    matrix_all_unnorm[mouse_name] = {}

for m, mat in enumerate(idxs):
    if len(mat) > 35: 
            
        # Trials data
        session = mat[0:36]
        mouse_name = mat[37:]

        session_trials = one.load_object(session, obj='trials', namespace='ibl')
        session_trials = session_trials.to_df()
        session_end = list(session_trials['stimOff_times'][-1:])[0]  # TODO: this might not work if stimOff times are missing
        session_start = list(session_trials['stimOn_times'])[0]

        # Get time of last unbiased trial
        unbiased = session_trials.loc[session_trials['probabilityLeft']==0.5]
        time_trial_90 = list(unbiased['stimOff_times'])[-1]
        
        filename = str('design_matrix_' + mat + '_'  + str(bin_size))  # + mouse_name + '_'
                
        big_design_matrix = pickle.load(open(filename, "rb"))
        design_matrix = big_design_matrix.groupby('Bin')[var_names].mean()  # 
        design_matrix = design_matrix.reset_index(level = [0])  # , 'Onset times'
        design_matrix = design_matrix.dropna()
    
        # Keep only first 90 trials
        # design_matrix = design_matrix.loc[(design_matrix['Bin'] < time_trial_90 * 10) & (design_matrix['Bin'] > session_start * 10)]
        # unbiased_trials = session_trials.loc[session_trials['stimOff_times'] < time_trial_90]

        design_matrix = design_matrix.loc[(design_matrix['Bin'] < session_end * 10) & (design_matrix['Bin'] > session_start * 10)]
        unbiased_trials = session_trials.loc[session_trials['stimOff_times'] < session_end]
        
        training_set = np.array(design_matrix[var_names]).copy() 
        
        if len(training_set) > 0:
            # Standardization using StandardScaler
            scaler = StandardScaler()
            standardized = scaler.fit_transform(training_set)
            # Normalize between 0 and 1
            normalizer = Normalizer().fit(standardized)
            normalized = normalizer.transform(standardized)
            
            matrix_all[mouse_name][session] = normalized
            session_all[mouse_name][session] = unbiased_trials    
            matrix_all_unnorm[mouse_name][session] = design_matrix
            
        else:
            print(session)
    else:
        print(mat)

local md5 mismatch on dataset: churchlandlab/Subjects/CSHL049/2020-01-11/001/alf/_ibl_trials.stimOff_times.npy
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/churchlandlab/Subjects/CSHL049/2020-01-11/001/alf/_ibl_trials.stimOff_times.npy: 100%|██████████| 4.60k/4.60k [00:00<00:00, 11.9kB/s]
local md5 mismatch on dataset: cortexlab/Subjects/KS023/2019-12-10/001/alf/_ibl_trials.goCueTrigger_times.npy
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/cortexlab/Subjects/KS023/2019-12-10/001/alf/_ibl_trials.goCueTrigger_times.npy: 100%|██████████| 5.26k/5.26k [00:00<00:00, 12.8kB/s]


_
poch_DY_013.png_
portances.svg_


local md5 mismatch on dataset: cortexlab/Subjects/KS023/2019-12-06/001/alf/_ibl_trials.goCueTrigger_times.npy
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/cortexlab/Subjects/KS023/2019-12-06/001/alf/_ibl_trials.goCueTrigger_times.npy: 100%|██████████| 5.35k/5.35k [00:00<00:00, 14.9kB/s]
local md5 mismatch on dataset: cortexlab/Subjects/KS014/2019-12-03/001/alf/_ibl_trials.goCueTrigger_times.npy
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/cortexlab/Subjects/KS014/2019-12-03/001/alf/_ibl_trials.goCueTrigger_times.npy: 100%|██████████| 4.38k/4.38k [00:00<00:00, 12.8kB/s]


lls_
_
poch_CSHL060.png_
_matrix_


local md5 mismatch on dataset: cortexlab/Subjects/KS014/2019-12-07/001/alf/_ibl_trials.goCueTrigger_times.npy
/home/ines/Downloads/ONE/alyx.internationalbrainlab.org/cortexlab/Subjects/KS014/2019-12-07/001/alf/_ibl_trials.goCueTrigger_times.npy: 100%|██████████| 5.38k/5.38k [00:00<00:00, 15.2kB/s]


1b61b7f2-a599-4e40-abd6-3e758d2c9e25
_


## Concatenate sessions of the same animal

In [5]:
collapsed_matrices = defaultdict(list)
collapsed_unnorm = defaultdict(list)
collapsed_trials = defaultdict(list)

# Collapse multiple sessions per mouse
for mouse in np.unique(mouse_names):
    if len(np.where(mouse_names==mouse)[0]) > 1 and len(mouse) > 0:
        mouse_sessions = list(matrix_all[mouse].keys())
        for s, session in enumerate(mouse_sessions):
            collapsed_matrices[mouse] = np.vstack(matrix_all[mouse][session])
            collapsed_unnorm[mouse] = np.vstack(matrix_all_unnorm[mouse][session])
            collapsed_trials[mouse] = pd.concat(session_all[mouse], ignore_index=True)

## Loop through animals

In [6]:
broader = True
var = 'broader_label'

all_animals_df = pd.DataFrame(columns=np.concatenate(([var], var_names)))

for m, mat in enumerate(idxs[4:5]):
    if len(mat) > 35: 
        
        session_df = pd.DataFrame(columns=np.concatenate(([var], var_names)))
        # Trials data
        session = mat[0:36]
        # Mouse name
        mouse_name = mat[37:]
        
        print('Fitting mouse ' + mouse_name)

        # Get mouse data
        session_trials = session_all[mouse_name][session]
        design_matrix = matrix_all[mouse_name][session]
        unnorm_mat = matrix_all_unnorm[mouse_name][session]
        
        if len(np.shape(design_matrix)) > 2:
            design_matrix = design_matrix[0]
            session_trials = session_trials[0]
            unnorm_mat = matrix_all_unnorm[mouse_name][0]

        " Prepare design matrix "
        design_matrix_heading = pd.DataFrame(columns=var_names)
        design_matrix_heading[var_names] = design_matrix
        bins = unnorm_mat['Bin']
        design_matrix_heading['Bin'] = bins
        
        # Absolute wheel velocity
        # design_matrix_heading['avg_wheel_vel'] = np.abs(design_matrix_heading['avg_wheel_vel'])
        
        # unnorm_mat['avg_wheel_vel'] = np.abs(unnorm_mat['avg_wheel_vel'])

        # # Get trial phase
        # bins_df = bins_per_trial_phase(design_matrix_heading, session_trials)
        # bins_df = bins_df.rename(columns={0: 'Bin'})
        # new_df = design_matrix_heading.merge(bins_df, on='Bin')

        # Wheel data
        wheel = wheel_trial_epoch(one, session_trials, session, bin_size)
        wheel_df = process_quiescence(wheel)
        wheel_df = wheel_df.rename(columns={"trial": "Trial"})
        wheel_df = wheel_df.groupby(['Trial', 'feedback', 'next_feedback'])['quiesc_length', 'time_to_quiesc', 
                                                                            'pre_quiesc_move_duration', 'pre_quiesc_move_count'].mean().reset_index()

        # Get trial phase
        bins_df = bins_per_trial_phase(unnorm_mat, session_trials)
        bins_df = bins_df.rename(columns={0: 'Bin'})
        new_df = unnorm_mat.merge(bins_df, on='Bin')

        # Get broader labels
        new_df = broader_label(new_df)
        # Average per trial epoch and append side by side
        appended_df = pd.DataFrame()
        for i, v in enumerate(new_df[var].unique()):
            session_df = new_df.groupby(['Trial', 'choice', var])[var_names].mean().reset_index()
            if i == 0:
                appended_df = session_df.loc[session_df[var]=='Choice'].reset_index()
            else:
                appended_df = pd.concat([appended_df, session_df.loc[session_df[var]=='Choice'].reset_index()[var_names]], axis=1)
        
        # Merge the two dataframes
        final_df = appended_df.merge(wheel_df, on='Trial')
        final_df = final_df.drop(columns=['index', var])
        
        # Save results
        if m == 0:
            all_animals_df = final_df.copy()
        else:
            all_animals_df = all_animals_df.append(final_df)
        

Fitting mouse NR_0019


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['movement_duration'][1:] = np.diff(new_df['movement_onset'])
  wheel_df = wheel_df.groupby(['Trial', 'feedback', 'next_feedback'])['quiesc_length', 'time_to_quiesc',
  all_df = quiescence_df.append(left_stim_df)
  all_df = all_df.append(right_stim_df)
  all_df = all_df.append(left_df)
  all_df = all_df.append(right_df)
  all_df = all_df.append(correct_df)
  all_df = all_df.append(incorrect_df)
  all_df = all_df.append(iti_df_correct)
  all_df = all_df.append(iti_df_incorrect)


AttributeError: module 'pandas' has no attribute 'DafaFrame'

In [7]:


        # Average per trial epoch and append side by side
        appended_df = pd.DataFrame()
        for i, v in enumerate(new_df[var].unique()):
            session_df = new_df.groupby(['Trial', 'choice', var])[var_names].mean().reset_index()
            if i == 0:
                appended_df = session_df.loc[session_df[var]=='Choice'].reset_index()
            else:
                appended_df = pd.concat([appended_df, session_df.loc[session_df[var]=='Choice'].reset_index()[var_names]], axis=1)
        
        # Merge the two dataframes
        final_df = appended_df.merge(wheel_df, on='Trial')
        final_df = final_df.drop(columns=['index', var])

In [8]:
final_df

Unnamed: 0,Trial,choice,avg_wheel_vel,pupil_diameter,whisker_me,nose_speed,l_paw_speed,left_X,left_Y,pupil_speed,...,pupil_speed.1,pupil_X,pupil_Y,Lick count,feedback,next_feedback,quiesc_length,time_to_quiesc,pre_quiesc_move_duration,pre_quiesc_move_count
0,0.0,-1.0,0.550476,-2.197134,9.400145,11.302629,111.807603,618.150509,407.683583,13.176123,...,13.176123,504.577444,97.706131,0.0,1.0,1.0,0.065,2.233,1.390,3.0
1,1.0,-1.0,0.435981,1.096427,9.580277,11.994240,124.029133,599.544570,470.212411,12.153387,...,12.153387,513.424259,99.301131,0.0,1.0,1.0,0.017,3.816,2.368,3.0
2,2.0,-1.0,0.588160,-3.594531,9.956267,6.861942,108.638840,610.782895,452.398097,8.910444,...,8.910444,508.253536,92.355560,0.0,1.0,1.0,0.224,2.208,2.172,3.0
3,3.0,1.0,-1.363633,-4.787380,11.214179,11.584343,287.458587,606.520329,391.693284,11.321547,...,11.321547,505.102661,94.759828,0.0,1.0,1.0,0.261,4.718,1.135,2.0
4,4.0,1.0,-0.257969,0.878783,12.115533,11.551937,107.407481,625.769227,354.049753,9.196149,...,9.196149,516.214431,97.432091,0.0,1.0,-1.0,0.407,3.988,0.765,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
765,892.0,1.0,-0.047131,0.453868,4.651405,9.504309,37.708188,619.234363,394.531071,9.402752,...,9.402752,505.764815,89.618333,0.0,1.0,1.0,0.075,2.435,2.196,2.0
766,893.0,1.0,-1.276366,-1.099013,11.955361,20.561902,171.686951,621.057652,378.585576,10.889383,...,10.889383,508.731877,95.693131,0.0,1.0,1.0,0.050,2.239,1.000,3.0
767,894.0,1.0,-2.403758,-3.103179,11.481006,20.221175,89.650991,640.722307,310.786899,8.813670,...,8.813670,510.081844,95.526449,0.0,1.0,1.0,0.043,2.246,1.026,4.0
768,895.0,1.0,-0.014238,-0.793435,2.959137,7.601116,20.004474,637.013176,359.064003,6.027601,...,6.027601,503.173422,88.419866,0.0,1.0,1.0,0.074,2.316,0.327,1.0


In [57]:
        appended_df = pd.DataFrame()
        for i, v in enumerate(new_df[var].unique()):
            session_df = new_df.groupby(['Trial', 'choice', var])[var_names].mean().reset_index()
            if i == 0:
                appended_df = session_df.loc[session_df[var]=='Choice'].reset_index()
            else:
                appended_df = pd.concat([appended_df, session_df.loc[session_df[var]=='Choice'].reset_index()[var_names]], axis=1)
        

In [58]:
appended_df

Unnamed: 0,index,Trial,choice,broader_label,avg_wheel_vel,pupil_diameter,whisker_me,nose_speed,l_paw_speed,left_X,...,pupil_diameter.1,whisker_me.1,nose_speed.1,l_paw_speed.1,left_X.1,left_Y,pupil_speed,pupil_X,pupil_Y,Lick count
0,0,0.0,-1.0,Choice,0.826132,-2.197134,9.400145,11.302629,111.807603,618.150509,...,-2.197134,9.400145,11.302629,111.807603,618.150509,407.683583,13.176123,504.577444,97.706131,0.0
1,3,1.0,-1.0,Choice,0.453758,1.096427,9.580277,11.994240,124.029133,599.544570,...,1.096427,9.580277,11.994240,124.029133,599.544570,470.212411,12.153387,513.424259,99.301131,0.0
2,7,2.0,-1.0,Choice,0.588160,-3.594531,9.956267,6.861942,108.638840,610.782895,...,-3.594531,9.956267,6.861942,108.638840,610.782895,452.398097,8.910444,508.253536,92.355560,0.0
3,11,3.0,1.0,Choice,1.363633,-4.787380,11.214179,11.584343,287.458587,606.520329,...,-4.787380,11.214179,11.584343,287.458587,606.520329,391.693284,11.321547,505.102661,94.759828,0.0
4,15,4.0,1.0,Choice,0.261414,0.878783,12.115533,11.551937,107.407481,625.769227,...,0.878783,12.115533,11.551937,107.407481,625.769227,354.049753,9.196149,516.214431,97.432091,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,285,84.0,-1.0,Choice,0.759658,0.362664,6.627517,11.782875,233.849971,616.972488,...,0.362664,6.627517,11.782875,233.849971,616.972488,424.760166,8.848255,508.235276,95.598744,0.0
77,288,85.0,-1.0,Choice,2.814653,2.071334,8.777136,6.378155,53.724408,618.879350,...,2.071334,8.777136,6.378155,53.724408,618.879350,389.700158,12.750349,511.749172,95.738024,0.0
78,291,86.0,-1.0,Choice,0.175428,4.589742,2.955158,4.577713,45.033452,608.830078,...,4.589742,2.955158,4.577713,45.033452,608.830078,449.972849,9.303341,511.282364,91.514290,0.0
79,294,87.0,-1.0,Choice,1.129319,10.440143,9.645418,8.626710,109.670491,602.685095,...,10.440143,9.645418,8.626710,109.670491,602.685095,455.412610,19.597637,507.858833,96.854754,0.0


In [51]:
session_df.loc[session_df[var]=='Choice'].reset_index()[var_names]

Unnamed: 0,avg_wheel_vel,pupil_diameter,whisker_me,nose_speed,l_paw_speed,left_X,left_Y,pupil_speed,pupil_X,pupil_Y,Lick count
0,0.826132,-2.197134,9.400145,11.302629,111.807603,618.150509,407.683583,13.176123,504.577444,97.706131,0.0
1,0.453758,1.096427,9.580277,11.994240,124.029133,599.544570,470.212411,12.153387,513.424259,99.301131,0.0
2,0.588160,-3.594531,9.956267,6.861942,108.638840,610.782895,452.398097,8.910444,508.253536,92.355560,0.0
3,1.363633,-4.787380,11.214179,11.584343,287.458587,606.520329,391.693284,11.321547,505.102661,94.759828,0.0
4,0.261414,0.878783,12.115533,11.551937,107.407481,625.769227,354.049753,9.196149,516.214431,97.432091,0.0
...,...,...,...,...,...,...,...,...,...,...,...
76,0.759658,0.362664,6.627517,11.782875,233.849971,616.972488,424.760166,8.848255,508.235276,95.598744,0.0
77,2.814653,2.071334,8.777136,6.378155,53.724408,618.879350,389.700158,12.750349,511.749172,95.738024,0.0
78,0.175428,4.589742,2.955158,4.577713,45.033452,608.830078,449.972849,9.303341,511.282364,91.514290,0.0
79,1.129319,10.440143,9.645418,8.626710,109.670491,602.685095,455.412610,19.597637,507.858833,96.854754,0.0


In [45]:
new_df.keys()

Index(['Bin', 'avg_wheel_vel', 'pupil_diameter', 'whisker_me', 'nose_speed',
       'l_paw_speed', 'left_X', 'left_Y', 'pupil_speed', 'pupil_X', 'pupil_Y',
       'Lick count', 'label', 'Trial', 'correct', 'signed_contrast', 'choice',
       'broader_label'],
      dtype='object')

In [40]:
for i, v in enumerate(final_df[var].unique()):
    
    

0 Choice
1 ITI
2 Stimulus
3 Quiescence


In [34]:
final_df

Unnamed: 0,Trial,choice,broader_label,avg_wheel_vel,pupil_diameter,whisker_me,nose_speed,l_paw_speed,left_X,left_Y,pupil_speed,pupil_X,pupil_Y,Lick count,feedback,next_feedback,quiesc_length,time_to_quiesc,pre_quiesc_move_duration,pre_quiesc_move_count
0,0.0,-1.0,Choice,0.826132,-2.197134,9.400145,11.302629,111.807603,618.150509,407.683583,13.176123,504.577444,97.706131,0.0,1.0,1.0,0.065,2.233,1.390,3.0
1,0.0,-1.0,ITI,0.641060,-0.516327,9.549812,11.835329,90.800288,591.300431,482.126907,10.393859,504.187281,95.631850,0.0,1.0,1.0,0.065,2.233,1.390,3.0
2,0.0,-1.0,Stimulus,0.011034,-2.699264,4.162473,4.261357,23.148943,591.373647,496.123943,10.857258,506.605897,94.375591,0.0,1.0,1.0,0.065,2.233,1.390,3.0
3,1.0,-1.0,Choice,0.453758,1.096427,9.580277,11.994240,124.029133,599.544570,470.212411,12.153387,513.424259,99.301131,0.0,1.0,1.0,0.017,3.816,2.368,3.0
4,1.0,-1.0,ITI,0.316926,2.776509,10.568571,4.654246,95.747787,635.820553,387.812358,9.157098,508.171518,94.478773,0.0,1.0,1.0,0.017,3.816,2.368,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
289,85.0,-1.0,ITI,0.551836,6.996808,3.453124,5.746657,47.645274,601.883085,453.428486,6.301864,508.322079,91.224649,0.0,-1.0,-1.0,0.053,3.260,0.860,2.0
290,85.0,-1.0,Quiescence,0.041989,-20.151287,7.642675,4.813738,35.100661,606.640593,437.974860,7.970615,511.702143,91.775361,0.0,-1.0,-1.0,0.053,3.260,0.860,2.0
291,86.0,-1.0,Choice,0.175428,4.589742,2.955158,4.577713,45.033452,608.830078,449.972849,9.303341,511.282364,91.514290,0.0,-1.0,1.0,0.065,4.431,3.289,2.0
292,86.0,-1.0,ITI,0.282220,5.709799,3.229477,3.856431,50.099243,603.063407,449.569281,8.373342,508.883254,90.899535,0.0,-1.0,1.0,0.065,4.431,3.289,2.0
