We do not have ground truth here but we have the high quality units from neuropixels. The idea is to show that the STTC concat 
works better compared to the trial average methods (both Pearson and STTC).

Steps:
0. Calculate STTC and ACF on the full signal ()
1. Take the full signal and randomly pick N trials
2. Calculate Pearson trial-average
3. Calculate STTC trial-average
4. Calculate STTC on concatenated data
5. Compare 2,3,4, values calculated on full signa

In [1]:
import csv
import numpy as np
import pandas as pd

from statsmodels.tsa.stattools import acf
#from scipy.optimize import curve_fit, OptimizeWarning

#import warnings

# import from scripts
import os
current_wd = os.getcwd()
os.chdir(os.path.abspath("..\\..\\..\\isttc\\scripts"))
#os.chdir(os.path.abspath("C:\\Users\\ipoch\\Documents\\repos\\isttc\\scripts"))
from calculate_tau import fit_single_exp
from cfg_global import project_folder_path
from calculate_acf import acf_sttc, acf_pearsonr_trial_avg, acf_sttc_trial_avg, acf_sttc_trial_concat
os.chdir(current_wd)

In [2]:
dataset_folder = project_folder_path + 'results\\allen_mice\\dataset\\'
#fig_folder = project_folder_path + 'results\\allen_mice\\fig_draft_paper\\'

### Get data

In [3]:
csv_data_file = dataset_folder + 'cut_30min\\sua_list_constrained.csv'
with open(csv_data_file, newline='') as f:
    reader = csv.reader(f)
    sua_list = list(reader)
print(f'Loaded N units {len(sua_list)}')

Loaded N units 5775


In [4]:
csv_binned_data_file = dataset_folder + 'cut_30min\\sua_list_constrained_binned_50ms.csv'
with open(csv_binned_data_file, newline='') as f:
    reader = csv.reader(f)
    binned_sua_list = list(reader)
print(f'Loaded N units {len(binned_sua_list)}')

Loaded N units 5775


### Calculate ACFs

In [5]:
fs = 30000 # raw neuropixels
n_lags = 20

bin_size = 50 * (fs / 1000)
sttc_dt = 49 * (fs / 1000)
signal_len = 30 * 60 * fs

acf_cols = ['acf_' + str(i) for i in range(n_lags+1)]
print('acf_cols {}'.format(acf_cols))

acf_cols ['acf_0', 'acf_1', 'acf_2', 'acf_3', 'acf_4', 'acf_5', 'acf_6', 'acf_7', 'acf_8', 'acf_9', 'acf_10', 'acf_11', 'acf_12', 'acf_13', 'acf_14', 'acf_15', 'acf_16', 'acf_17', 'acf_18', 'acf_19', 'acf_20']


In [6]:
calc_acf_full = True
calc_isttc_full = True
calc_pearsonr_trial_avg = False
calc_sttc_trial_avg = False
calc_sttc_trial_concat = False

In [7]:
if calc_acf_full:
    acf_full_l = []
    unit_metadata_l = []  # To store values 0-7
    
    for unit_idx, unit in enumerate(binned_sua_list):
        if unit_idx % 100 == 0:
            print(f'Processing unit {unit_idx}')
        spike_train_binned_int = np.asarray([int(spike) for spike in unit[8:]])
        spike_train_binned_acf = acf(spike_train_binned_int, nlags=n_lags)
        acf_full_l.append(spike_train_binned_acf)
        unit_metadata_l.append(unit[:8])
    
    acf_full_df = pd.DataFrame(np.array(acf_full_l), columns=acf_cols)
    column_names = ["specimen_id", "session_id", "unit_id", "ecephys_structure_acronym", 'firing_rate', 'amplitude_cutoff', 'isi_violations', 'presence_ratio']
    metadata_df = pd.DataFrame(unit_metadata_l, columns=column_names)
    
    acf_full_df = pd.concat([metadata_df, acf_full_df], axis=1)
    
    print('NaNs in acf {}'.format(acf_full_df.isnull().any().any()))
    acf_full_df.head(3)
    
    acf_full_df.to_pickle(dataset_folder + 'cut_30min\\binned\\acf\\acf_full_50ms_20lags_df.pkl')

Processing unit 0
Processing unit 100
Processing unit 200
Processing unit 300
Processing unit 400
Processing unit 500
Processing unit 600
Processing unit 700
Processing unit 800
Processing unit 900
Processing unit 1000
Processing unit 1100
Processing unit 1200
Processing unit 1300
Processing unit 1400
Processing unit 1500
Processing unit 1600
Processing unit 1700
Processing unit 1800
Processing unit 1900
Processing unit 2000
Processing unit 2100
Processing unit 2200
Processing unit 2300
Processing unit 2400
Processing unit 2500
Processing unit 2600
Processing unit 2700
Processing unit 2800
Processing unit 2900
Processing unit 3000
Processing unit 3100
Processing unit 3200
Processing unit 3300
Processing unit 3400
Processing unit 3500
Processing unit 3600
Processing unit 3700
Processing unit 3800
Processing unit 3900
Processing unit 4000
Processing unit 4100
Processing unit 4200
Processing unit 4300
Processing unit 4400
Processing unit 4500
Processing unit 4600
Processing unit 4700
Proc

In [None]:
if calc_isttc_full:
    acf_isttc_full_l = []
    unit_metadata_l = []  # To store values 0-7
    
    for unit_idx, unit in enumerate(sua_list):
        if unit_idx % 100 == 0:
            print(f'Processing unit {unit_idx}')
        spike_train_int = np.asarray([int(spike) for spike in unit[8:]])
        spike_train_acf = acf_sttc(spike_train_int, n_lags, bin_size, sttc_dt, signal_len, verbose_=False)
        acf_isttc_full_l.append(spike_train_acf)
        unit_metadata_l.append(unit[:8])
    
    acf_isttc_full_df = pd.DataFrame(np.array(acf_isttc_full_l), columns=acf_cols)
    column_names = ["specimen_id", "session_id", "unit_id", "ecephys_structure_acronym", 'firing_rate', 'amplitude_cutoff', 'isi_violations', 'presence_ratio']
    metadata_df = pd.DataFrame(unit_metadata_l, columns=column_names)
    
    acf_isttc_full_df = pd.concat([metadata_df, acf_isttc_full_df], axis=1)
    
    print('NaNs in acf {}'.format(acf_isttc_full_df.isnull().any().any()))
    acf_isttc_full_df.head(3)

    acf_isttc_full_df.to_pickle(dataset_folder + 'cut_30min\\non_binned\\acf\\acf_isttc_full_50ms_20lags_df.pkl')

Processing unit 0


### Make trials

In [None]:
def get_trials(spike_times_, signal_len_, n_trials_, trial_len_, verbose_=False):
    # get random trail starts and ends
    trials_start = [randrange(0, signal_len_-trial_len_+1) for i in range(n_trials_)]
    trials_end = [trial_start + trial_len_ for trial_start in trials_start]
    trial_intervals = np.vstack((trials_start, trials_end)).T
    if verbose_:
        print('N trials {}, trail len {}, n trial starts {}, \ntrial starts {}, \ntrial starts {}'.format(n_trials_, trial_len_, 
                                                                                                          len(trials_start), 
                                                                                                          trials_start, trials_end))
    # get spikes
    spikes_trials = []
    for i in range(n_trials_):
        spikes_trial = spike_times_[np.logical_and(spike_times_ >= trial_intervals[i,0], spike_times_ < trial_intervals[i,1])]
        spikes_trials.append(spikes_trial)

    # realign all trails to start with 0
    spikes_trials_realigned_l = []
    for idx, trial in enumerate(spikes_trials):
        spikes_trial_realigned = trial - trial_intervals[idx,0] 
        spikes_trials_realigned_l.append(spikes_trial_realigned)

    return spikes_trials_realigned_l

def bin_trials(spikes_trials_l_, trial_len_, bin_size_):
    binned_spikes_trials_l = []

    n_bin_edges =  int(trial_len_/bin_size_)
    bins_ = np.linspace(0, bin_size * n_bin_edges, n_bin_edges + 1).astype(int)
    for trial in spikes_trials_l_:
        binned_spike_train, _ = np.histogram(trial, bins_)
        binned_spikes_trials_l.append(binned_spike_train)
    binned_spikes_trials_2d = np.asarray(binned_spikes_trials_l)

    return binned_spikes_trials_2d

### Run for one trial realization