Calculate autocorrelation per trial (1 ACF per trial):

* calculate Pearson for all trials (all non zero spike trials)
* calculate ACF proper for all trials (also non zero)
* calculate iSTTC for all trials (also non zero) 

In [1]:
import pandas as pd
import numpy as np
import csv
from statsmodels.tsa.stattools import acf
import matplotlib.pyplot as plt
import seaborn as sns
import sys

# import from scripts
import os
os.chdir(os.path.expanduser("D:\\intr_timescales\\isttc\\scripts"))
# os.chdir(os.path.expanduser("C:\\Users\\ipoch\\Documents\\repos\\isttc\\scripts"))
from calculate_acf import acf_pearsonr, acf_sttc

### Get and prep the data

In [15]:
area = 'pfdl' # pfp

In [None]:
# binned data
results_folder = 'Q:\\Personal\\Irina\\projects\\isttc\\isttc\\results\\monkey\\'
save_folder_binned = results_folder + 'fixation_period_1000ms\\binned\\' + area + '\\acf\\'

csv_data_file = results_folder + 'data_' + area + '_fixon_1500ms_fixation_with_empty_binned_50ms.csv'
with open(csv_data_file, newline='') as f:
    reader = csv.reader(f)
    sua_binned_list = list(reader)
    
n_binned_spike_trains = len(sua_binned_list)
print('N spike_trains in {}: {}'.format(area, n_binned_spike_trains))

# transform list to an array and to a dataframe 
sua_binned_array = np.array(sua_binned_list)
sua_binned_array = sua_binned_array[:, :-1-9] # for 1000 calc

bin_cols = ['bin_' + str(i) for i in range(sua_binned_array.shape[1]-3)]
sua_binned_df = pd.DataFrame(sua_binned_array, columns=['unit_id', 'trial_id','condition_id'] + bin_cols)
sua_binned_df = sua_binned_df.astype('int')

n_binned_units = len(sua_binned_df['unit_id'].unique())
print('n units {}'.format(n_binned_units))

sua_binned_df.head(2)

In [16]:
# non-binned data
results_folder = 'Q:\\Personal\\Irina\\projects\\isttc\\results\\monkey\\'
save_folder_non_binned = results_folder + 'fixation_period_1000ms\\non_binned\\' + area + '\\acf\\'

csv_data_file = results_folder + 'data_' + area + '_fixon_1500ms_with_empty_fixation.csv'
with open(csv_data_file, newline='') as f:
    reader = csv.reader(f)
    sua_non_binned_list = list(reader)
    
n_non_binned_spike_trains = len(sua_non_binned_list)
print('N spike_trains in {}: {}'.format(area, n_non_binned_spike_trains))

# transform data to a dict, key is unit_id, values is a list of spike trains (one spike train per trial)
units_dict = {}
for spike_train in sua_non_binned_list:
    spike_train_ = np.asarray(spike_train[3:]).astype(int)
    spike_train_1000 = spike_train_[spike_train_ <= 1000]
    if int(spike_train[0]) in units_dict:
        units_dict[int(spike_train[0])].append(spike_train_1000)
    else:
        units_dict[int(spike_train[0])] = []
        units_dict[int(spike_train[0])].append(spike_train_1000)

n_non_binned_units = len(units_dict)
print('n units {}'.format(n_non_binned_units))

### Calculate autocorrelation

In [4]:
n_lags = 20
acf_cols = ['acf_' + str(i) for i in range(n_lags)]
print('acf_cols {}'.format(acf_cols))

acf_cols ['acf_0', 'acf_1', 'acf_2', 'acf_3', 'acf_4', 'acf_5', 'acf_6', 'acf_7', 'acf_8', 'acf_9', 'acf_10', 'acf_11', 'acf_12', 'acf_13', 'acf_14', 'acf_15', 'acf_16', 'acf_17', 'acf_18', 'acf_19']


#### Calc ACF using Pearson 

In [None]:
old_stdout = sys.stdout
sys.stdout = open(save_folder_binned + 'acf_trial_pearsonr_cal_log.txt', 'w')

acf_trial_pearsonr_l, unit_id_pearsonr_l, trial_id_pearsonr = [],[],[]

for idx in range(len(sua_binned_df)):
#for idx in range(10):
    print('Processing unit {}, trial {}'.format(sua_binned_df['unit_id'].values[idx], sua_binned_df['trial_id'].values[idx]))
    if np.count_nonzero(sua_binned_df[bin_cols].values[idx, :]) <= 1:
        print('WARNING: trial has {} non zero bins, nothing to correlate, skipping...'.format(np.count_nonzero(sua_binned_df[bin_cols].values[idx, :])))
    else:
        # n_lags-1 because of Pearson eq - for lag 19 only one values and P will get 0s in denominator
        acf_pearsonr_ = acf_pearsonr(sua_binned_df[bin_cols].values[idx, :], n_lags_=n_lags-1, verbose_=False) 
        acf_trial_pearsonr_l.append(acf_pearsonr_)
        unit_id_pearsonr_l.append(sua_binned_df['unit_id'].values[idx])
        trial_id_pearsonr.append(sua_binned_df['trial_id'].values[idx])

acf_pearsonr_df = pd.DataFrame(np.array(acf_trial_pearsonr_l), columns=acf_cols[:-1])
acf_pearsonr_df.insert(0, 'unit_id', unit_id_pearsonr_l)
acf_pearsonr_df.insert(1, 'trial_id', trial_id_pearsonr)

print('NaNs in acf {}'.format(acf_pearsonr_df.isnull().any().any()))
acf_pearsonr_df.head(3)

sys.stdout = old_stdout

In [None]:
acf_pearsonr_df.to_pickle(save_folder_binned + 'acf_trial_pearsonr_df_50ms_20lags_df.pkl')

#### Calc ACF using ACF equation

In [None]:
old_stdout = sys.stdout
sys.stdout = open(save_folder_binned + 'acf_trial_proper_cal_log.txt', 'w')

acf_trial_proper_l, unit_id_proper_l, trial_id_proper_l = [],[],[]

for idx in range(len(sua_binned_df)):
    print('Processing unit {}, trial {}'.format(sua_binned_df['unit_id'].values[idx], sua_binned_df['trial_id'].values[idx]))
    if np.count_nonzero(sua_binned_df[bin_cols].values[idx, :]) <= 1:
        print('WARNING: trial has {} non zero bins, nothing to correlate, skipping...'.format(np.count_nonzero(sua_binned_df[bin_cols].values[idx, :])))
    else:
        acf_proper = acf(sua_binned_df[bin_cols].values[idx, :], nlags=n_lags)
        acf_trial_proper_l.append(acf_proper)
        unit_id_proper_l.append(sua_binned_df['unit_id'].values[idx])
        trial_id_proper_l.append(sua_binned_df['trial_id'].values[idx])

acf_proper_df = pd.DataFrame(np.array(acf_trial_proper_l), columns=acf_cols)
acf_proper_df.insert(0, 'unit_id', unit_id_proper_l)
acf_proper_df.insert(1, 'trial_id', trial_id_proper_l)

print('NaNs in acf {}'.format(acf_proper_df.isnull().any().any()))
acf_proper_df.head(3)

sys.stdout = old_stdout

In [None]:
acf_proper_df.to_pickle(save_folder_binned + 'acf_trial_proper_df_50ms_20lags_df.pkl')

#### Calc acf using iSTTC

In [None]:
old_stdout = sys.stdout
sys.stdout = open(save_folder_non_binned + 'acf_trial_isttc_cal_log.txt', 'w')

lag_shift = 50
sttc_dt = 50

acf_trial_isttc_l, unit_id_isttc_l, trial_id_isttc_l = [],[],[]

for idx in range(len(sua_non_binned_list)):
    print('Processing unit {}, trial {}'.format(sua_non_binned_list[idx][0], sua_non_binned_list[idx][1]))
    spike_train_a = np.asarray(sua_non_binned_list[idx][3:]).astype(int)
    spike_train = spike_train_a[spike_train_a <= 1000]
    if len(spike_train) <= 1:
        print('WARNING: trial has {} <= 1, nothing to correlate, skipping...'.format(len(spike_train)))
    else:
        acf_isttc = acf_sttc(spike_train, n_lags, lag_shift, sttc_dt, signal_length_=lag_shift*n_lags, verbose_=False))
        acf_trial_isttc_l.append(acf_isttc)
        unit_id_isttc_l.append(sua_non_binned_list[idx][0])
        trial_id_isttc_l.append(sua_non_binned_list[idx][1])

acf_isttc_df = pd.DataFrame(np.array(acf_trial_isttc_l), columns=acf_cols)
acf_isttc_df.insert(0, 'unit_id', unit_id_isttc_l)
acf_isttc_df.insert(1, 'trial_id', trial_id_isttc_l)

print('NaNs in acf {}'.format(acf_isttc_df.isnull().any().any()))
acf_isttc_df.head(3)

sys.stdout = old_stdout

In [None]:
acf_isttc_df

In [None]:
acf_isttc_df.to_pickle(save_folder_non_binned + 'acf_trial_isttc_df_50ms_20lags_df.pkl')

#### Calc acf using iSTTC (concatenating trails in one signal)

In [7]:
len(units_dict[0])

97

In [17]:
zero_padding_len = 2000 # trial len * 2

unit_id_l = []
spike_train_concat_l = []
signal_len_l = []

for k,v in units_dict.items():
    spike_train_concat = v[0]
    for idx, trial in enumerate(v[1:]):
        spike_train_concat = np.hstack((spike_train_concat, trial+(idx+1)*zero_padding_len))
    unit_id_l.append(k)
    spike_train_concat_l.append(spike_train_concat)
    # signal_len_l.append((idx+2)*2000) # 03.12.2024 this is not accurate?
    signal_len_l.append((len(v))*1000 + (len(v)-1)*zero_padding_len)

In [18]:
acf_cols_isttc_concat = ['acf_' + str(i) for i in range(21)]
acf_cols_isttc_concat

['acf_0',
 'acf_1',
 'acf_2',
 'acf_3',
 'acf_4',
 'acf_5',
 'acf_6',
 'acf_7',
 'acf_8',
 'acf_9',
 'acf_10',
 'acf_11',
 'acf_12',
 'acf_13',
 'acf_14',
 'acf_15',
 'acf_16',
 'acf_17',
 'acf_18',
 'acf_19',
 'acf_20']

In [19]:
old_stdout = sys.stdout
sys.stdout = open(save_folder_non_binned + 'acf_trial_concat_isttc_cal_log_03_12_2024.txt', 'w')

lag_shift = 50
sttc_dt = 50

acf_trial_concat_isttc_l, unit_id_isttc_concat_l = [],[]

for idx in range(len(spike_train_concat_l)):
    print('Processing unit {}'.format(unit_id_l[idx]))
    spike_train = spike_train_concat_l[idx]
    if len(spike_train) <= 1:
        print('WARNING: unit has {} <= 1, nothing to correlate, skipping...'.format(len(spike_train)))
    else:
        acf_isttc = acf_sttc(spike_train, n_lags, lag_shift, sttc_dt, signal_length_=signal_len_l[idx], verbose_=False)
        acf_trial_concat_isttc_l.append(acf_isttc)
        unit_id_isttc_concat_l.append(unit_id_l[idx])

acf_isttc_concat_df = pd.DataFrame(np.array(acf_trial_concat_isttc_l), columns=acf_cols_isttc_concat)
acf_isttc_concat_df.insert(0, 'unit_id', unit_id_isttc_concat_l)

print('NaNs in acf {}'.format(acf_isttc_concat_df.isnull().any().any()))
acf_isttc_concat_df.head(3)

sys.stdout = old_stdout

In [20]:
acf_isttc_concat_df

Unnamed: 0,unit_id,acf_0,acf_1,acf_2,acf_3,acf_4,acf_5,acf_6,acf_7,acf_8,...,acf_11,acf_12,acf_13,acf_14,acf_15,acf_16,acf_17,acf_18,acf_19,acf_20
0,0,1.0,1.000000,0.711980,0.625856,0.556889,0.473234,0.408132,0.351745,0.299456,...,0.130273,0.087089,0.042383,-0.006460,-0.045150,-0.091765,-0.135553,-0.175855,-0.211241,-0.231623
1,1,1.0,1.000000,0.213675,0.203603,0.143217,0.143221,0.123120,0.123110,0.022826,...,0.022817,0.002799,0.022811,0.002793,-0.017211,-0.017214,-0.017218,-0.017221,-0.017224,-0.017227
2,2,1.0,1.000000,0.150423,0.094846,0.113349,0.094834,0.108699,0.104066,0.057884,...,0.034834,0.021021,0.030216,0.067071,0.104063,0.067082,-0.011136,-0.006277,-0.006283,-0.033934
3,3,1.0,0.999277,0.610947,0.530236,0.471707,0.411362,0.366655,0.319712,0.276377,...,0.153980,0.121897,0.082160,0.043000,0.005949,-0.031110,-0.060606,-0.092284,-0.137797,-0.160157
4,4,1.0,1.000000,0.283210,0.260980,0.208359,0.143887,0.114764,0.106044,0.098785,...,0.049662,0.039586,0.029500,0.010854,-0.007736,-0.020591,-0.034840,-0.039118,-0.046205,-0.053174
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534,539,1.0,1.000000,0.420946,0.325190,0.232685,0.193972,0.172260,0.138541,0.092954,...,0.076638,0.062259,0.057465,0.052673,0.028776,0.007321,-0.006987,-0.021288,-0.040297,-0.045047
535,540,1.0,1.000000,0.352969,0.245126,0.114493,0.030880,-0.010773,-0.019928,-0.019931,...,-0.006009,-0.006013,-0.019947,-0.019950,-0.019953,-0.019956,-0.019959,-0.019962,-0.019965,-0.019969
536,541,1.0,1.000000,0.666762,0.560644,0.458181,0.372080,0.292562,0.243016,0.198815,...,0.056611,0.016969,-0.020856,-0.055443,-0.083107,-0.112654,-0.138805,-0.160150,-0.186587,-0.199400
537,542,1.0,1.000000,0.331758,0.252136,0.170943,0.116426,0.070851,0.049259,0.034773,...,0.018069,0.007344,0.009626,0.000950,-0.034156,-0.029741,-0.025392,-0.038531,-0.047239,-0.047247


In [21]:
acf_isttc_concat_df.to_pickle(save_folder_non_binned + 'acf_trial_isttc_concat_df_50ms_20lags_df_03_12_2024.pkl')