Calculate ACFs:
1. Pearsonr trial average
2. STTC trial average
3. STTC trial concat
4. Pearsonr per trial
5. ACF proper per trial
6. iSTTC per trial

In [1]:
import pandas as pd
import numpy as np
import csv
import sys
from statsmodels.tsa.stattools import acf

# import from scripts
import os
current_wd = os.getcwd()
os.chdir(os.path.abspath("..\\..\\..\\isttc\\scripts"))
from calculate_acf import acf_pearsonr_trial_avg, acf_sttc_trial_avg, acf_sttc_trial_concat, acf_pearsonr, acf_sttc
from cfg_global import project_folder_path
os.chdir(current_wd)

### Get and prep the data

In [2]:
data_folder = project_folder_path + 'results\\monkey\\'
results_folder = project_folder_path + 'results\\monkey\\fixation_period_1000ms\\'

In [3]:
area = 'pfp' # pfp

In [4]:
# binned data
csv_data_file = data_folder + 'data_' + area + '_fixon_1000ms_with_empty_fixation_binned_50ms.csv'
with open(csv_data_file, newline='') as f:
    reader = csv.reader(f)
    sua_binned_list = list(reader)
    
n_binned_spike_trains = len(sua_binned_list)
print('N spike_trains in {}: {}'.format(area, n_binned_spike_trains))

# transform list to an array and to a dataframe 
sua_binned_array = np.array(sua_binned_list)

bin_cols = ['bin_' + str(i) for i in range(sua_binned_array.shape[1]-5)]
sua_binned_df = pd.DataFrame(sua_binned_array, columns=['unit_id', 'trial_id','condition_id','spike_count','fr_hz'] + bin_cols)
for col_name in ['unit_id', 'trial_id','condition_id','spike_count'] + bin_cols:
    sua_binned_df[col_name] = sua_binned_df[col_name].astype(int)
sua_binned_df['fr_hz'] = sua_binned_df['fr_hz'].astype(float)

n_binned_units = len(sua_binned_df['unit_id'].unique())
print('n units {}'.format(n_binned_units))

sua_binned_df.head(2)

N spike_trains in pfp: 43677
n units 543


Unnamed: 0,unit_id,trial_id,condition_id,spike_count,fr_hz,bin_0,bin_1,bin_2,bin_3,bin_4,...,bin_10,bin_11,bin_12,bin_13,bin_14,bin_15,bin_16,bin_17,bin_18,bin_19
0,0,0,0,1,1.0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,0,1,0,2,2.0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
# non-binned data
csv_data_file = data_folder + 'data_' + area + '_fixon_1000ms_with_empty_fixation.csv'
with open(csv_data_file, newline='') as f:
    reader = csv.reader(f)
    sua_non_binned_list = list(reader)
    
n_non_binned_spike_trains = len(sua_non_binned_list)
print('N spike_trains in {}: {}'.format(area, n_non_binned_spike_trains))

# transform data to a dict, key is unit_id, values is a list of spike trains (one spike train per trial)
units_dict = {}
for spike_train in sua_non_binned_list:
    spike_train_ = np.asarray(spike_train[5:]).astype(int)
    # spike_train_1000 = spike_train_[spike_train_ <= 1000]
    if int(spike_train[0]) in units_dict:
        units_dict[int(spike_train[0])].append(spike_train_)
    else:
        units_dict[int(spike_train[0])] = []
        units_dict[int(spike_train[0])].append(spike_train_)

n_non_binned_units = len(units_dict)
print('n units {}'.format(n_non_binned_units))

N spike_trains in pfp: 43677
n units 543


### Calculate autocorrelation function

In [7]:
n_lags = 20
acf_cols = ['acf_' + str(i) for i in range(n_lags)]
print('acf_cols {}'.format(acf_cols))

# params for sttc
bin_size = 50
sttc_dt = 49
trial_len = n_lags * bin_size

acf_cols ['acf_0', 'acf_1', 'acf_2', 'acf_3', 'acf_4', 'acf_5', 'acf_6', 'acf_7', 'acf_8', 'acf_9', 'acf_10', 'acf_11', 'acf_12', 'acf_13', 'acf_14', 'acf_15', 'acf_16', 'acf_17', 'acf_18', 'acf_19']


In [8]:
calc_pearsonr_trial_avg = False
calc_sttc_trial_avg = False
calc_sttc_trial_concat = False
calc_pearsonr_per_trial = False
calc_acf_proper_per_trial = False
calc_isttc_per_trial = False

#### Using Pearson trial-average (as in papers)

In [None]:
if calc_pearsonr_trial_avg:
    acf_pearsonr_trial_avg_l = []
    acf_matrix_pearsonr_trial_avg_l = []
    
    unit_id_l = sua_binned_df['unit_id'].unique()
    unit_id_calc_l = []
    
    for unit in unit_id_l:
        print('Processing unit {}'.format(unit))
        sua_binned_unit_df = sua_binned_df.query('unit_id == @unit')
        print('N trials {}'.format(len(sua_binned_unit_df)))
        
        if len(sua_binned_unit_df) <= 1:
            print('ONLY 1 TRIAL: can not calculate, skipping...')
        else:
            acf_matrix, acf_average = acf_pearsonr_trial_avg(sua_binned_unit_df[bin_cols].values, n_lags, verbose_=False)
            acf_pearsonr_trial_avg_l.append(acf_average)
            acf_matrix_pearsonr_trial_avg_l.append(acf_matrix)
            unit_id_calc_l.append(unit)
    
    acf_pearsonr_trial_avg_df = pd.DataFrame(np.array(acf_pearsonr_trial_avg_l), columns=acf_cols)
    acf_pearsonr_trial_avg_df.insert(0, 'unit_id', unit_id_calc_l)
    
    print('NaNs in acf {}'.format(acf_pearsonr_trial_avg_df.isnull().any().any()))
    acf_pearsonr_trial_avg_df.head(3)
    
    acf_pearsonr_trial_avg_df.to_pickle(results_folder + 'binned\\' + area + '\\acf\\acf_pearsonr_trial_avg_1000ms_with_empty_50ms_20lags_df.pkl')
    np.save(results_folder + 'binned\\' + area + '\\acf\\acf_matrix_pearsonr_trial_avg_1000ms_with_empty_50ms_20lags_df.npy', acf_matrix_pearsonr_trial_avg_l)

#### Using STTC trial-average

In [None]:
if calc_sttc_trial_avg:
    acf_sttc_trial_avg_l = []
    acf_matrix_sttc_trial_avg_l = []
    unit_id_calc_l = []
    
    for k,v in units_dict.items():
        print('Processing unit {}, n trials {}'.format(k, len(v)))
        
        if len(v) <= 1:
            print('ONLY 1 TRIAL: can not calculate, skipping...')
        else:
            acf_matrix, acf_average = acf_sttc_trial_avg(v, n_lags_=n_lags, lag_shift_=bin_size, sttc_dt_=sttc_dt, zero_padding_len_=150, verbose_=False)
            acf_sttc_trial_avg_l.append(acf_average)
            acf_matrix_sttc_trial_avg_l.append(acf_matrix)
            unit_id_calc_l.append(k)
    
    acf_sttc_trial_avg_df = pd.DataFrame(np.array(acf_sttc_trial_avg_l), columns=acf_cols)
    acf_sttc_trial_avg_df.insert(0, 'unit_id', unit_id_calc_l)
    
    print('NaNs in acf {}'.format(acf_sttc_trial_avg_df.isnull().any().any()))
    acf_sttc_trial_avg_df.head(3)
    
    acf_sttc_trial_avg_df.to_pickle(results_folder + 'non_binned\\' + area + '\\acf\\acf_sttc_trial_avg_1000ms_with_empty_50ms_20lags_df.pkl')
    np.save(results_folder + 'non_binned\\' + area + '\\acf\\acf_matrix_sttc_trial_avg_1000ms_with_empty_50ms_20lags_df.npy', acf_matrix_sttc_trial_avg_l)

#### Using STTC trial-concat

In [None]:
if calc_sttc_trial_concat:
    acf_sttc_trial_concat_l = []
    acf_matrix_sttc_trial_concat_l = []
    unit_id_calc_l = []
    
    for k,v in units_dict.items():
        print('Processing unit {}, n trials {}'.format(k, len(v)))
        
        if len(v) <= 1:
            print('ONLY 1 TRIAL: can not calculate, skipping...')
        else:
            acf_concat = acf_sttc_trial_concat(v, n_lags_=n_lags, lag_shift_=bin_size, sttc_dt_=sttc_dt, trial_len_=trial_len,
                                               zero_padding_len_=2000, verbose_=False)
            acf_sttc_trial_concat_l.append(acf_concat)
            unit_id_calc_l.append(k)
    
    acf_sttc_trial_concat_df = pd.DataFrame(np.array(acf_sttc_trial_concat_l), columns=acf_cols)
    acf_sttc_trial_concat_df.insert(0, 'unit_id', unit_id_calc_l)
    
    print('NaNs in acf {}'.format(acf_sttc_trial_concat_df.isnull().any().any()))
    acf_sttc_trial_concat_df.head(3)
    
    acf_sttc_trial_concat_df.to_pickle(results_folder + 'non_binned\\' + area + '\\acf\\acf_sttc_trial_concat_1000ms_with_empty_50ms_20lags_df.pkl')

#### Per trial: using Pearson

In [None]:
# todo I use df now but I can also use just a list here? (in trial average I use df to get all trials fro a specific unit)
if calc_pearsonr_per_trial:
    old_stdout = sys.stdout
    sys.stdout = open(results_folder + 'binned\\' + area + '\\acf\\acf_per_trial_pearsonr_cal_log.txt', 'w')
    
    acf_trial_pearsonr_l, unit_id_pearsonr_l, trial_id_pearsonr_l, condition_id_pearsonr_l, spike_count_pearsonr_l, fr_hz_pearsonr_l = [],[],[],[],[],[]
    
    for idx in range(len(sua_binned_df)):
        print('Processing unit {}, trial {}'.format(sua_binned_df['unit_id'].values[idx], sua_binned_df['trial_id'].values[idx]))
        if np.count_nonzero(sua_binned_df[bin_cols].values[idx, :]) <= 1:
            print('WARNING: trial has {} non zero bins, nothing to correlate, skipping...'.format(np.count_nonzero(sua_binned_df[bin_cols].values[idx, :])))
        else:
            acf_pearsonr_ = acf_pearsonr(sua_binned_df[bin_cols].values[idx, :], n_lags_=n_lags, verbose_=False) 
            acf_trial_pearsonr_l.append(acf_pearsonr_)
            unit_id_pearsonr_l.append(sua_binned_df['unit_id'].values[idx])
            trial_id_pearsonr_l.append(sua_binned_df['trial_id'].values[idx])
            condition_id_pearsonr_l.append(sua_binned_df['condition_id'].values[idx])
            spike_count_pearsonr_l.append(sua_binned_df['spike_count'].values[idx])
            fr_hz_pearsonr_l.append(sua_binned_df['fr_hz'].values[idx])
    
    acf_per_trial_pearsonr_df = pd.DataFrame(np.array(acf_trial_pearsonr_l), columns=acf_cols[:-1])
    acf_per_trial_pearsonr_df.insert(0, 'unit_id', unit_id_pearsonr_l)
    acf_per_trial_pearsonr_df.insert(1, 'trial_id', trial_id_pearsonr_l)
    acf_per_trial_pearsonr_df.insert(2, 'condition_id', condition_id_pearsonr_l)
    acf_per_trial_pearsonr_df.insert(3, 'spike_count', spike_count_pearsonr_l)
    acf_per_trial_pearsonr_df.insert(4, 'fr_hz', fr_hz_pearsonr_l)
    
    print('NaNs in acf {}'.format(acf_per_trial_pearsonr_df.isnull().any().any()))
    acf_per_trial_pearsonr_df.head(3)
    
    sys.stdout = old_stdout
    
    acf_per_trial_pearsonr_df.to_pickle(results_folder + 'binned\\' + area + '\\acf\\acf_pearsonr_per_trial_1000ms_with_empty_50ms_20lags_df.pkl')

#### Per trial: using ACF formula

In [None]:
if calc_acf_proper_per_trial:
    old_stdout = sys.stdout
    sys.stdout = open(results_folder + 'binned\\' + area + '\\acf\\acf_per_trial_proper_cal_log.txt', 'w')
    
    acf_trial_proper_l, unit_id_proper_l, trial_id_proper_l, condition_id_proper_l, spike_count_proper_l, fr_hz_proper_l = [],[],[],[],[],[]
    
    for idx in range(len(sua_binned_df)):
        print('Processing unit {}, trial {}'.format(sua_binned_df['unit_id'].values[idx], sua_binned_df['trial_id'].values[idx]))
        if np.count_nonzero(sua_binned_df[bin_cols].values[idx, :]) <= 1:
            print('WARNING: trial has {} non zero bins, nothing to correlate, skipping...'.format(np.count_nonzero(sua_binned_df[bin_cols].values[idx, :])))
        else:
            acf_proper = acf(sua_binned_df[bin_cols].values[idx, :], nlags=n_lags)
            acf_trial_proper_l.append(acf_proper)
            unit_id_proper_l.append(sua_binned_df['unit_id'].values[idx])
            trial_id_proper_l.append(sua_binned_df['trial_id'].values[idx])
            condition_id_proper_l.append(sua_binned_df['condition_id'].values[idx])
            spike_count_proper_l.append(sua_binned_df['spike_count'].values[idx])
            fr_hz_proper_l.append(sua_binned_df['fr_hz'].values[idx])
    
    acf_proper_df = pd.DataFrame(np.array(acf_trial_proper_l), columns=acf_cols)
    acf_proper_df.insert(0, 'unit_id', unit_id_proper_l)
    acf_proper_df.insert(1, 'trial_id', trial_id_proper_l)
    acf_proper_df.insert(2, 'condition_id', condition_id_proper_l)
    acf_proper_df.insert(3, 'spike_count', spike_count_proper_l)
    acf_proper_df.insert(4, 'fr_hz', fr_hz_proper_l)
    
    print('NaNs in acf {}'.format(acf_proper_df.isnull().any().any()))
    acf_proper_df.head(3)
    
    sys.stdout = old_stdout
    
    acf_proper_df.to_pickle(results_folder + 'binned\\' + area + '\\acf\\acf_proper_per_trial_1000ms_with_empty_50ms_20lags_df.pkl')

#### Per trial: using iSTTC

In [None]:
if calc_isttc_per_trial:
    old_stdout = sys.stdout
    sys.stdout = open(results_folder + 'non_binned\\' + area + '\\acf\\acf_per_trial_isttc_cal_log.txt', 'w')
    
    acf_trial_isttc_l, unit_id_isttc_l, trial_id_isttc_l, condition_id_isttc_l, spike_count_isttc_l, fr_hz_isttc_l = [],[],[],[],[],[]
    
    for idx in range(len(sua_non_binned_list)):
        print('Processing unit {}, trial {}'.format(sua_non_binned_list[idx][0], sua_non_binned_list[idx][1]))
        spike_train = np.asarray(sua_non_binned_list[idx][5:]).astype(int)
        # spike_train = spike_train_a[spike_train_a <= 1000]
        if len(spike_train) <= 1:
            print('WARNING: trial has {} <= 1, nothing to correlate, skipping...'.format(len(spike_train)))
        else:
            acf_isttc = acf_sttc(spike_train, n_lags, bin_size, sttc_dt, trial_len, verbose_=False)
            acf_trial_isttc_l.append(acf_isttc)
            unit_id_isttc_l.append(sua_non_binned_list[idx][0])
            trial_id_isttc_l.append(sua_non_binned_list[idx][1])
            condition_id_isttc_l.append(sua_non_binned_list[idx][2])
            spike_count_isttc_l.append(sua_non_binned_list[idx][3])
            fr_hz_isttc_l.append(sua_non_binned_list[idx][4])
    
    acf_isttc_df = pd.DataFrame(np.array(acf_trial_isttc_l), columns=acf_cols)
    acf_isttc_df.insert(0, 'unit_id', unit_id_isttc_l)
    acf_isttc_df.insert(1, 'trial_id', trial_id_isttc_l)
    acf_isttc_df.insert(2, 'condition_id', condition_id_isttc_l)
    acf_isttc_df.insert(3, 'spike_count', spike_count_isttc_l)
    acf_isttc_df.insert(4, 'fr_hz', fr_hz_isttc_l)
    
    print('NaNs in acf {}'.format(acf_isttc_df.isnull().any().any()))
    acf_isttc_df.head(3)
    
    sys.stdout = old_stdout
    
    acf_isttc_df.to_pickle(results_folder + 'non_binned\\' + area + '\\acf\\acf_isttc_per_trial_1000ms_with_empty_50ms_20lags_df.pkl')