## Data preprocessing steps

The following steps were performed to preprocess the data, this was done for each of the 15 sessions.

The preprocessed and trial-concatenated data was stored. All analyses were done using the saved data.

In [1]:
'''
Dependencies
'''
# General
from sklearn.decomposition import PCA
from tqdm.auto import tqdm
import scipy.io as sio
import pandas as pd
import numpy as np
import scipy
import random
import sys
import warnings 
from scipy import stats
warnings.simplefilter('ignore')
import statsmodels.api as sm
from tqdm import tqdm as pbar

# Utils and specific functions
from pyaldata import * 
sys.path.insert(0, '..') # rutils is in main folder directory
from rutils import *

In [10]:
'''
Sessions. Note that VR sessions have typically more neurons than CF sessions
'''

sessions_vr = [
 'Chewie_CO_VR_2016-09-09.mat', # File Matt used to generate initial figures
 'Chewie_CO_VR_2016-09-12.mat',
 'Chewie_CO_VR_2016-09-14.mat',
 'Chewie_CO_VR_2016-10-06.mat',
 'Mihili_CO_VR_2014-03-03.mat',
 'Mihili_CO_VR_2014-03-04.mat',
 'Mihili_CO_VR_2014-03-06.mat',
]

sessions_cf = [
    'Chewie_CO_FF_2016-09-15.mat',
    'Chewie_CO_FF_2016-09-21.mat',
    'Chewie_CO_FF_2016-10-05.mat',
    'Chewie_CO_FF_2016-10-07.mat',
    'Mihili_CO_FF_2014-02-03.mat',
    'Mihili_CO_FF_2014-02-17.mat',
    'Mihili_CO_FF_2014-02-18.mat',
    'Mihili_CO_FF_2014-03-07.mat'
]

'''
Load electrode maps.
'''
m1_emap  = localize_elecs(read_cmp(file_path='/Users/Daphne/Data/Chewie Left M1 SN 6250-001474.cmp'), elecs=range(1,97))
pmd_emap = localize_elecs(read_cmp(file_path='/Users/Daphne/Data/Chewie Left PMd SN 6251-001469.cmp'), elecs=range(1,97))

np.random.seed(2021)

# Control number 3: randomize the electrode positions
m1_emap_rand = np.random.permutation(m1_emap.flat).reshape(10,10)
pmd_emap_rand = np.random.permutation(pmd_emap.flat).reshape(10,10)

In [13]:
'''
Pick a session and load in as pandas dataframe
'''

session = sessions_cf[0]
df = mat2dataframe('/Users/Daphne/Data/' + session, shift_idx_fields=True)
s_monkey = session[:1]

df['M1_unit_guide'][0]

(76, 2)

In [None]:
import preprocess

td =

## 1 Preprocess

### 1.1 Preprocess spiking data 

First, we preprocess the trial data. Note that changing the parameters here will influence the results. 

In [5]:
warnings.filterwarnings('ignore')

# Combine bins 
td = combine_time_bins(df, n_bins=3)

# Remove low firing neurons
td = remove_low_firing_neurons(td, signal='M1_spikes',  threshold=1)
td = remove_low_firing_neurons(td, signal='PMd_spikes', threshold=1)

# Sqrt transform neurons
td = transform_signal(td, signals='M1_spikes',  transformations='sqrt')
td = transform_signal(td, signals='PMd_spikes', transformations='sqrt')

# Merge signals
td = merge_signals(td, ['M1_spikes', 'PMd_spikes'], 'both_spikes')

# Calculate firing rates from spikes, works on '_spikes' fields and automatically divides by bin_size
td = add_firing_rates(td, 'smooth', std=0.05)

# Select only baseline (BL) trials
td = td.loc[td['epoch'] == 'BL']

print(f'Number of neurons before processing: {df.M1_spikes[0].shape[1]+ df.PMd_spikes[0].shape[1]} \
      => After processing: {td.M1_spikes[0].shape[1]+ td.PMd_spikes[0].shape[1]} (M1: {td.M1_spikes[0].shape[1]}, PMd: {td.PMd_spikes[0].shape[1]})')

Number of neurons before processing: 92       => After processing: 91 (M1: 26, PMd: 65)


In [6]:
print(f'\n Selected session: {session} \n')


 Selected session: Mihili_CO_FF_2014-03-07.mat 



### 1.2 Trial-concatenate data 

Concatenate all trials to get an $N \times (T \times P)$ matrix $\mathbf{X}$

In [8]:
# This is the trial-concatenated data on which we will perform dimensionality reduction
X_concat = np.concatenate(td['both_rates'].values, axis=0).T

X_concat.shape # (neurons x timepoints)

(91, 33005)

This matrix was saved and used for further analyses.