## PAW wavelet decomposition

In [1]:
prefix = '/home/ines/repositories/'
# prefix = '/Users/ineslaranjeira/Documents/Repositories/'

In [2]:
""" 
IMPORTS
"""
import os
import numpy as np
import pandas as pd
from one.api import ONE
from sklearn.manifold import TSNE
from scipy import stats


# Get my functions
from functions import idxs_from_files, plot_kde, GMM_neg_log_likelihood

one = ONE(mode='remote')

In [3]:
""" 
LOAD DATA AND PARAMETERS
"""
# LOAD DATA
var = 'wheel'

data_path = prefix + 'representation_learning_variability/paper-individuality/data/design_matrices/kcenia/'
all_files = os.listdir(data_path)
design_matrices = [item for item in all_files if 'design_matrix' in item and 'standardized' not in item]
idxs, mouse_names = idxs_from_files(design_matrices)

if var == 'paw':
    wavelet_path = prefix + 'representation_learning_variability/paper-individuality/data/paw_wavelets/1_camera_setup/'
elif var == 'wheel':
    wavelet_path = prefix + 'representation_learning_variability/paper-individuality/data/wheel_wavelets/1_camera_setup/'

velocity = True

In [4]:
# Loop through animals
files = os.listdir(wavelet_path)
sessions_to_process = []

for m, mat in enumerate(idxs):
    mouse_name = mat[37:]
    session = mat[:36]

    """ SAVE DATA """       
    # Save wavelets
    if var == 'paw':
        if velocity:
            subname = "paw_vel_wavelets_"
        else:
            subname = "paw_pos_wavelets_"
    elif var == 'wheel':
        subname = 'wheel_vel_wavelets_'
    
    filename = subname + str(session) + '_'  + mouse_name

    if filename in files:
        sessions_to_process.append((mouse_name, session))
    
print(len(sessions_to_process))

37


# Subsample

In [5]:
if var == 'paw':
    var_interest = ['l_paw_x', 'l_paw_y', 'r_paw_x', 'r_paw_y', 
                    'l_paw_x0.5', 'l_paw_x1.0', 'l_paw_x2.0', 'l_paw_x4.0', 'l_paw_x8.0', 'l_paw_x16.0', 'l_paw_x32.0',
                    'l_paw_y0.5', 'l_paw_y1.0', 'l_paw_y2.0', 'l_paw_y4.0', 'l_paw_y8.0', 'l_paw_y16.0', 'l_paw_y32.0',
                    'r_paw_x0.5', 'r_paw_x1.0', 'r_paw_x2.0', 'r_paw_x4.0', 'r_paw_x8.0', 'r_paw_x16.0', 'r_paw_x32.0',
                    'r_paw_y0.5', 'r_paw_y1.0', 'r_paw_y2.0', 'r_paw_y4.0', 'r_paw_y8.0', 'r_paw_y16.0', 'r_paw_y32.0']
elif var == 'wheel':
    var_interest = ['avg_wheel_vel0.5', 'avg_wheel_vel1.0', 'avg_wheel_vel2.0', 'avg_wheel_vel4.0', 'avg_wheel_vel8.0', 
                    'avg_wheel_vel16.0', 'avg_wheel_vel32.0',
                    ]


In [6]:
concatenated_subsampled = np.array([])
assert len(sessions_to_process) == 37
for m, mat in enumerate(sessions_to_process):

    mouse_name = mat[0]
    session = mat[1]
    filename = wavelet_path + subname + str(session) + '_'  + mouse_name
    design_matrix = pd.read_parquet(filename)

    # Prepare data
    data_df = design_matrix[var_interest].copy().dropna()
    data = np.array(data_df)
    
    """ Randomly subsample """
    n_samples = 10000
    if data.shape[0] > n_samples:
        sampled_indices = np.random.choice(data.shape[0], 
                                        n_samples, 
                                        replace=False)
        sampled_data = data[sampled_indices, :]
        
        """ Perform t-SNE """
        X = sampled_data.copy()
        # X = stats.zscore(sampled_data, axis=0)
        X_embedded = TSNE(n_components=2, learning_rate='auto',
                    init='random', perplexity=32).fit_transform(X)
        # plt.scatter(X_embedded[:, ], X_embedded[:, 1], alpha=0.02)
        # plt.show()
        
        """ Perform Gaussian kde """
        values = X_embedded.T.copy()
        kernel = stats.gaussian_kde(values)
        # plot_kde(X_embedded, kernel)
        
        """ Resample weighted by kde estimate """
        sample_prob = kernel(values)
        norm_sample_prob = sample_prob/ np.sum(sample_prob)
        resampled_indices = np.random.choice(sampled_data.shape[0], 
                                        size=2000, 
                                        p=norm_sample_prob, replace=False)
        resampled_data = sampled_data[resampled_indices, :]
        
        """ Concatenate session """
        if len(concatenated_subsampled) == 0:
            concatenated_subsampled = resampled_data.copy()
        else:
            concatenated_subsampled = np.vstack([concatenated_subsampled, resampled_data])
    
    print(mat)

('ZFM-06305', '1f959c77-862c-4da1-a454-74a6e99ff33e')
('ZFM-08751', '89a9b132-7cda-4d97-8d28-448119c0910e')
('ZFM-08751', 'f9b1dafa-1ec0-46e2-a72c-987535b72bb6')
('ZFM-04019', '89b8ef70-e620-49c2-a0f7-09890ba9fc0e')
('ZFM-08751', 'b27c0221-781d-4519-bd14-9a03938edc12')
('ZFM-04534', '404d1b7e-f6f2-4122-bd2d-9caa6a0e7777')
('ZFM-08652', '3bef683f-46a4-4b98-8b53-8eb0d5bdf82b')
('ZFM-04019', '04482c20-f984-4d90-97d6-6791bae1e6f7')
('ZFM-08751', 'a3657685-f782-41e6-bc94-2fb1061b1fec')
('ZFM-08652', 'f0cfd8b5-4568-4944-96c9-23d75aed659c')
('ZFM-08871', '8c2639e9-ad67-4c07-bdc0-4a5ba994375c')
('ZFM-08871', 'f2343157-4824-4fa2-84c1-a8bfebe21a8f')
('ZFM-05236', 'e69221c7-b533-42dc-8631-000279a45a70')
('ZFM-05236', '72a5e765-efaf-49a5-831b-3cc35ec76517')
('ZFM-05236', 'd7bf2611-00bc-42d3-95c6-ac120753cd94')
('ZFM-03059', 'dd549116-68a1-462d-90ef-f4e0ec5c69fc')
('ZFM-08828', 'f5369fa4-4bc2-4c65-9217-bea27df0f9ca')
('ZFM-08776', 'fa9efd38-29f9-41a6-a005-21c9fc4be5dc')
('ZFM-06305', '12f1d635-8a2a

In [7]:
path = prefix + 'representation_learning_variability/paper-individuality/0_pre-processing/1_camera_setup/'
np.save(open(path+'supersession_wavelets_kcenia_wheel02-12-2026', 'wb'), concatenated_subsampled)