We do not have ground truth here but we have the high quality units from neuropixels. The idea is to show that the STTC concat 
works better compared to the trial average methods (both Pearson and STTC).

Steps:
0. Calculate STTC and ACF on the full signal ()
1. Take the full signal and randomly pick N trials
2. Calculate Pearson trial-average
3. Calculate STTC trial-average
4. Calculate STTC on concatenated data
5. Compare 2,3,4, values calculated on full signa

In [1]:
import csv
import numpy as np

from statsmodels.tsa.stattools import acf
from scipy.optimize import curve_fit, OptimizeWarning

import warnings

# import from scripts
import os
os.chdir(os.path.expanduser("D:\\intr_timescales\\isttc\\scripts"))
#os.chdir(os.path.expanduser("C:\\Users\\ipoch\\Documents\\repos\\isttc\\scripts"))
from calculate_acf import acf_sttc, acf_pearsonr_trial_avg, acf_sttc_trial_avg, acf_sttc_trial_concat

### Get data

In [2]:
data_folder = 'Q:\\Personal\\Irina\\projects\\isttc\\'

In [None]:
csv_data_file = data_folder + 'allen_test_full_v3.csv'
with open(csv_data_file, newline='') as f:
    reader = csv.reader(f)
    sua_list = list(reader)
print(f'Loaded N units {len(sua_list)}')

In [None]:
fs = 30000 # raw neuropixels

In [None]:
# check if there are spikes after 30 mins of recording

for i in range(len(sua_list)):
    spike_train_ = np.asarray(sua_list[i][4:]).astype(float)
    spike_train_fs = spike_train_ * fs  # csv is in sec
    spike_train_fs_int = list(spike_train_fs.astype(int))
    if spike_train_fs_int[-1] >= 30 * 60 * fs:
        print(f'spike >= 30 min {spike_train_fs_int[-1]}')

In [None]:
# use first 30 min of the signal
sua_list_30min = []

for i in range(len(sua_list)):
    spike_train_ = np.asarray(sua_list[i][4:]).astype(float)
    spike_train_fs = spike_train_ * fs  # csv is in sec
    spike_train_fs_int = spike_train_fs.astype(int)
    # if spike_train_fs_int[-1] >= 30 * 60 * fs:
    #     print(f'spike >= 30 min {spike_train_fs_int[-1]}')
    sua_list_30min.append(spike_train_fs_int[spike_train_fs_int < 30 * 60 * fs])

In [None]:
# bin the data
bin_size = 50 # in ms
fs = 30000 # raw neuropixels
signal_len = 30 * 60 * fs

verbose = False

sua_list_30min_binned_l = []

for j in range(len(sua_list_30min)):
    bin_length_fs = int(fs / 1000 * bin_size)
    n_bin_edges =  int(signal_len/bin_length_fs)
    bins = np.linspace(0, bin_length_fs * n_bin_edges, n_bin_edges + 1).astype(int)
    binned_spike_train, _ = np.histogram(sua_list_30min[j], bins)
    
    if verbose:
        print('Binning spike train: bin_length_ms {}, bin_length_fs {}'.format(bin_size, bin_length_fs))
        print('n bins {}, spike bin count: number of spikes in bin - number of bins {}'.format(binned_spike_train.shape,
                                                                                               np.unique(
                                                                                                   binned_spike_train,
                                                                                                   return_counts=True)))
    sua_list_30min_binned_l.append(binned_spike_train)

In [None]:
# save 
np.save(data_folder + 'results\\allen_mice\\dataset_full_split_check_30min\\sua_list_30min.npy', 
        np.asarray(sua_list_30min, dtype='object'))
np.save(data_folder + 'results\\allen_mice\\dataset_full_split_check_30min\\sua_list_30min_binned.npy', 
        sua_list_30min_binned_l, allow_pickle=True)

### Reload binned and non binned data 

In [3]:
def fit_single_exp(ydata_to_fit_, start_idx_=1):
    """
    Fit function func_exp to data using non-linear least square.

    todo check that - important point: Fit is done from the first ACF value (acf[0] is skipped, it is done like this
    in the papers, still not sure)

    :param ydata_to_fit_: 1d array, the dependant data to fit
    :param start_idx_: int, index to start fitting from
    :return: fit_popt, fit_pcov, tau, fit_r_squared
    """
    t = np.linspace(0, len(ydata_to_fit_), len(ydata_to_fit_)).astype(int)

    with warnings.catch_warnings():
        warnings.filterwarnings('error')
        try:
            popt, pcov = curve_fit(func_single_exp, t[start_idx_:], ydata_to_fit_[start_idx_:], maxfev=5000)
            fit_popt = popt
            fit_pcov = pcov
            tau = 1 / fit_popt[1]
        except RuntimeError as e:
            print('RuntimeError: {}'. format(e))
            fit_popt, fit_pcov, tau, fit_r_squared = np.nan, np.nan, np.nan, np.nan
        except OptimizeWarning as o:
            print('OptimizeWarning: {}'. format(o))
            fit_popt, fit_pcov, tau, fit_r_squared = np.nan, np.nan, np.nan, np.nan
        except RuntimeWarning as re:
            print('RuntimeWarning: {}'. format(re))
            fit_popt, fit_pcov, tau, fit_r_squared = np.nan, np.nan, np.nan, np.nan
        except ValueError as ve:
            print('ValueError: {}'. format(ve))
            print('Possible reason: acf contains NaNs, low spike count')
            fit_popt, fit_pcov, tau, fit_r_squared = np.nan, np.nan, np.nan, np.nan

    return tau

In [4]:
sua_list_30min = np.load(data_folder + 'results\\allen_mice\\dataset_full_split_check_30min\\sua_list_30min.npy', allow_pickle=True)
sua_list_30min_binned = np.load(data_folder + 'results\\allen_mice\\dataset_full_split_check_30min\\sua_list_30min_binned.npy', allow_pickle=True)

In [5]:
print(f'len sua {len(sua_list_30min)}, len sua_binned {len(sua_list_30min_binned)}')

len sua 18168, len sua_binned 18168


In [6]:
num_lags = 20
fs = 30000 # raw neuropixels
bin_size = 50 * (fs / 1000)
sttc_dt = 49 * (fs / 1000)
signal_len = 30 * 60 * fs

In [7]:
def func_exp_abc_like(x, a, tau):
    return a * np.exp(-x/tau) 

def func_single_exp_monkey_like(x, a, b, c):
    #return a * np.exp(-b * x) + c
    return a * (np.exp(-b * x) + c) # as in the paper

def func_single_exp(x, a, b, c):
    return a * np.exp(-b * x) + c

In [8]:
t_axes = np.linspace(0,num_lags,num_lags+1).astype(int)
print(t_axes)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]


In [None]:
sttc_full_tau_ms_l = []
acf_full_tau_ms_l = []

for k in range(len(sua_list_30min_binned)):
    # Using acf func
    spike_train_binned_acf = acf(sua_list_30min_binned[k], nlags=num_lags)
    #print('spike_train_binned_acf shape {}, \nspike_train_binned_acf: {}'.format(spike_train_binned_acf.shape, spike_train_binned_acf))
    
    # Using isttc
    spike_train_acf = acf_sttc(sua_list_30min[k], num_lags, lag_shift_=bin_size, sttc_dt_=sttc_dt, signal_length_=signal_len, verbose_=False)
    #print('spike_train_acf shape {}, \nspike_train_acf: {}'.format(len(spike_train_acf), spike_train_acf))
    
    # calculate tau
    spike_train_binned_tau = fit_single_exp(spike_train_binned_acf, start_idx_=1)
    spike_train_binned_tau_ms = spike_train_binned_tau * bin_size
    #print('spike_train_binned_tau_ms: {}'.format(spike_train_binned_tau_ms))
    
    spike_train_popt_tau = fit_single_exp(spike_train_acf, start_idx_=1)
    spike_train_tau_ms = spike_train_popt_tau * bin_size
    #print('spike_train_tau_ms: {}'.format(spike_train_tau_ms))

    sttc_full_tau_ms_l.append(spike_train_tau_ms)
    acf_full_tau_ms_l.append(spike_train_binned_tau_ms)



In [None]:
# save lists with taus
np.save(data_folder + 'results\\allen_mice\\dataset_full_split_check_30min\\sttc_full_tau_ms_l.npy', 
        sttc_full_tau_ms_l, , allow_pickle=True)
np.save(data_folder + 'results\\allen_mice\\dataset_full_split_check_30min\\acf_full_tau_ms_l.npy', 
        acf_full_tau_ms_l, allow_pickle=True)

### Make trials

In [None]:
def get_trials(spike_times_, signal_len_, n_trials_, trial_len_, verbose_=False):
    # get random trail starts and ends
    trials_start = [randrange(0, signal_len_-trial_len_+1) for i in range(n_trials_)]
    trials_end = [trial_start + trial_len_ for trial_start in trials_start]
    trial_intervals = np.vstack((trials_start, trials_end)).T
    if verbose_:
        print('N trials {}, trail len {}, n trial starts {}, \ntrial starts {}, \ntrial starts {}'.format(n_trials_, trial_len_, 
                                                                                                          len(trials_start), 
                                                                                                          trials_start, trials_end))
    # get spikes
    spikes_trials = []
    for i in range(n_trials_):
        spikes_trial = spike_times_[np.logical_and(spike_times_ >= trial_intervals[i,0], spike_times_ < trial_intervals[i,1])]
        spikes_trials.append(spikes_trial)

    # realign all trails to start with 0
    spikes_trials_realigned_l = []
    for idx, trial in enumerate(spikes_trials):
        spikes_trial_realigned = trial - trial_intervals[idx,0] 
        spikes_trials_realigned_l.append(spikes_trial_realigned)

    return spikes_trials_realigned_l

def bin_trials(spikes_trials_l_, trial_len_, bin_size_):
    binned_spikes_trials_l = []

    n_bin_edges =  int(trial_len_/bin_size_)
    bins_ = np.linspace(0, bin_size * n_bin_edges, n_bin_edges + 1).astype(int)
    for trial in spikes_trials_l_:
        binned_spike_train, _ = np.histogram(trial, bins_)
        binned_spikes_trials_l.append(binned_spike_train)
    binned_spikes_trials_2d = np.asarray(binned_spikes_trials_l)

    return binned_spikes_trials_2d

### Run for one trial realization