Prepare summary datasets.

In [1]:
import numpy as np
import pandas as pd
import pickle

from isttc.scripts.cfg_global import project_folder_path

In [2]:
dataset_folder = project_folder_path + 'synthetic_dataset\\'
results_folder = project_folder_path + 'results\\synthetic\\results\\param_fr_alpha_tau\\'

### Helpers

In [3]:
def calculate_acf_decline_flag(acf_, start_idx=1, end_idx=4):
    acf_decay = np.all(np.diff(acf_[start_idx:end_idx]) <= 0)
    return acf_decay

def get_tau_df(acf_dict_, signal_len_, method_, alphas_, fr_values_, taus_ms_):
    data = []
    for unit_id, unit_data in acf_dict_.items():
        taus = unit_data['taus']  
        data.append({
            'unit_id': unit_id,
            'tau': taus['tau'],
            'tau_lower': taus['tau_lower'],
            'tau_upper': taus['tau_upper'],
            'fit_r_squared': taus['fit_r_squared'],
            'acf_decline': calculate_acf_decline_flag(unit_data['acf'], start_idx=1, end_idx=4)
        })
    tau_df = pd.DataFrame(data)
    tau_df['method'] = method_
    tau_df['tau_ms'] = tau_df['tau'] * 50
    tau_df['duration_s'] = signal_len_
    tau_df['fr'] = fr_values_
    tau_df['alpha'] = alphas_
    tau_df['tau_ms_true'] = taus_ms_
    tau_df['tau_diff_abs'] = np.abs(tau_df['tau_ms'] - tau_df['tau_ms_true'])
    tau_df['tau_diff_rel'] = tau_df['tau_diff_abs'] / tau_df['tau_ms_true'] * 100
    tau_df['ci_width'] = np.abs(tau_df['tau_upper'] - tau_df['tau_lower'])
    
    rows_with_nans_df = tau_df[tau_df.isna().any(axis=1)]
    n_rows_with_nan = len(rows_with_nans_df)
    print(f'N rows with NaNs {n_rows_with_nan}')
    
    return tau_df

def get_trials_plot_df(trial_dict_, method_, alphas_, fr_values_, taus_ms_, n_iteration_=None):
    records = []
    for unit_id, data in trial_dict_.items():
        taus = data['taus']
        acfs = data['acf']

        if n_iteration_ is not None:
            # only one trial per unit
            idx = n_iteration_[unit_id]
            taus_to_iter = [(taus[idx], acfs[idx])]
        else:
            # all trials for this unit
            taus_to_iter = zip(taus, acfs)

        for tau_dict, acf_array in taus_to_iter:
            records.append({
                'unit_id': unit_id,
                'tau': tau_dict['tau'],
                'tau_lower': tau_dict['tau_lower'],
                'tau_upper': tau_dict['tau_upper'],
                'fit_r_squared': tau_dict['fit_r_squared'],
                'acf_decline': calculate_acf_decline_flag(acf_array, start_idx=1, end_idx=4),
                'method': method_,
            })

    tau_df = pd.DataFrame.from_records(records)
    tau_df['tau_ms'] = tau_df['tau'] * 50
    tau_df['fr'] = fr_values_
    tau_df['alpha'] = alphas_
    tau_df['tau_ms_true'] = taus_ms_
    tau_df['tau_diff_abs'] = np.abs(tau_df['tau_ms'] - tau_df['tau_ms_true'])
    tau_df['tau_diff_rel'] = tau_df['tau_diff_abs'] / tau_df['tau_ms_true'] * 100
    tau_df['ci_width'] = np.abs(tau_df['tau_upper'] - tau_df['tau_lower'])

    nan_count = tau_df.isna().any(axis=1).sum()
    if nan_count > 0:
        print(f'N rows with NaNs {nan_count}')

    return tau_df

### Load spike trains

In [4]:
with open(dataset_folder + 'spike_trains.pkl','rb') as f:
    data = pickle.load(f)

spike_trains = data['spike_trains']
alphas = data['alphas']
fr_values = data['fr_values']
taus_ms = data['tau_ms']
duration_ms = data['duration_ms']

print(f'n spike trains {len(spike_trains)}, len {spike_trains[0][-1]/1000}, duration_ms {duration_ms}')

lv_df = pd.read_pickle(results_folder + 'lv_df.pkl')
print(f'Lv loaded for n spike trains {len(lv_df)}')

#fs = 1000

n spike trains 100000, len 598.8243581617338, duration_ms 600000
Lv loaded for n spike trains 100000


### ACF, iSTTC, PersonR, iSTTC trails (сoncat)

The summary dataset includes only units where all 4 methods have valid (non-NaN) rows and fit_r_squared ≥ 0.

In [5]:
# load per method
with open(results_folder + 'tau_isttc_full_50ms_20lags_dict.pkl', "rb") as f:
    isttc_full_dict = pickle.load(f)

with open(results_folder + 'tau_acf_full_50ms_20lags_dict.pkl', "rb") as f:
    acf_full_dict = pickle.load(f)

with open(results_folder + 'tau_pearsonr_trial_50ms_20lags_dict.pkl', "rb") as f:
    pearsonr_trial_avg_dict = pickle.load(f)

with open(results_folder + 'tau_isttc_trial_concat_50ms_20lags_dict.pkl', "rb") as f:
    sttc_trial_concat_dict = pickle.load(f)

print(f'len pearsonr_trial_avg_dict {len(pearsonr_trial_avg_dict)}')
print(f'len sttc_trial_concat_dict {len(sttc_trial_concat_dict)}')
print(f'len isttc_full_dict {len(isttc_full_dict)}')
print(f'len acf_full_dict {len(acf_full_dict)}')

acf_tau_full_df = get_tau_df(acf_full_dict, 600, 'acf_full', alphas, fr_values, taus_ms)
isttc_tau_full_df = get_tau_df(isttc_full_dict, 600, 'isttc_full', alphas, fr_values, taus_ms)
random_trials_impl = np.zeros(len(sttc_trial_concat_dict)).astype(int)
pearsontr_trial_avg_plot_df = get_trials_plot_df(pearsonr_trial_avg_dict, 'pearsonr_trial_avg', alphas, fr_values, taus_ms, random_trials_impl)
sttc_trial_concat_plot_df = get_trials_plot_df(sttc_trial_concat_dict, 'sttc_trial_concat', alphas, fr_values, taus_ms, random_trials_impl)

# prepare df
tau_all_long_df = pd.concat([acf_tau_full_df, isttc_tau_full_df, pearsontr_trial_avg_plot_df, sttc_trial_concat_plot_df])
tau_all_long_df.reset_index(inplace=True, drop=True)
tau_all_long_df.drop(columns=['duration_s'], inplace=True)
tau_all_long_df = tau_all_long_df.merge(lv_df[['unit_id', 'lv']].copy(), on='unit_id', how='left')
print(f'len tau_all_long_df {len(tau_all_long_df)}')
# leave only units with both methods (no NaNs and r_squared >= 0)
required = {'acf_full', 'isttc_full', 'pearsonr_trial_avg', 'sttc_trial_concat'}
tau_all_long_df_clean = (
    tau_all_long_df.groupby("unit_id")
      .filter(lambda g: (
          len(g) == 4
          and set(g["method"]) == required
          and g.notna().all().all()
          and (g["fit_r_squared"] >= 0).all()
      ))
)
tau_all_long_df_clean.reset_index(inplace=True, drop=True)
print(f'len tau_all_long_df_clean {len(tau_all_long_df_clean)}, per method {len(tau_all_long_df_clean)/4}')

#save df
tau_all_long_df_clean.to_csv(results_folder + 'summary_tau_all_long_df.csv')
tau_all_long_df_clean.to_pickle(results_folder + 'summary_tau_all_long_df.pkl')

len pearsonr_trial_avg_dict 100000
len sttc_trial_concat_dict 100000
len isttc_full_dict 100000
len acf_full_dict 100000
N rows with NaNs 11
N rows with NaNs 0
N rows with NaNs 12393
len tau_all_long_df 400000
len tau_all_long_df_clean 326180, per method 81545.0


### ACF vs iSTTC, full signal

The summary dataset includes only units where all 2 methods have valid (non-NaN) rows and fit_r_squared ≥ 0.

In [7]:
# load per method
with open(results_folder + 'tau_isttc_full_50ms_20lags_dict.pkl', "rb") as f:
    isttc_full_dict = pickle.load(f)

with open(results_folder + 'tau_acf_full_50ms_20lags_dict.pkl', "rb") as f:
    acf_full_dict = pickle.load(f)

print(f'len isttc_full_dict {len(isttc_full_dict)}')
print(f'len acf_full_dict {len(acf_full_dict)}')

acf_tau_full_df = get_tau_df(acf_full_dict, 600, 'acf_full', alphas, fr_values, taus_ms)
isttc_tau_full_df = get_tau_df(isttc_full_dict, 600, 'isttc_full', alphas, fr_values, taus_ms)

# prepare df
tau_full_long_df = pd.concat([acf_tau_full_df, isttc_tau_full_df])
tau_full_long_df.reset_index(inplace=True, drop=True)
tau_full_long_df = tau_full_long_df.merge(lv_df[['unit_id', 'lv']].copy(), on='unit_id', how='left')
print(f'len tau_full_long_df {len(tau_full_long_df)}')
# leave only units with both methods (no NaNs and r_squared >= 0)
required = {"acf_full", "isttc_full"}
tau_full_long_df_clean = (
    tau_full_long_df.groupby("unit_id")
      .filter(lambda g: (
          len(g) == 2
          and set(g["method"]) == required
          and g.notna().all().all()
          and (g["fit_r_squared"] >= 0).all()
      ))
)
tau_full_long_df_clean.reset_index(inplace=True, drop=True)
print(f'len tau_full_long_df_clean {len(tau_full_long_df_clean)}, per method {len(tau_full_long_df_clean)/2}')

#save df
tau_full_long_df_clean.to_csv(results_folder + 'summary_tau_full_long_df.csv')
tau_full_long_df_clean.to_pickle(results_folder + 'summary_tau_full_long_df.pkl')

len isttc_full_dict 100000
len acf_full_dict 100000
N rows with NaNs 11
N rows with NaNs 0
len tau_full_long_df 200000
len tau_full_long_df_clean 199316, per method 99658.0


### PearsonR vs iSTTC (concat and avg), trails

The summary dataset includes only units where all 3 methods have valid (non-NaN) rows and fit_r_squared ≥ 0. STTC trail avg is not included in the paper figures. 

In [8]:
# load per method
with open(results_folder + 'tau_pearsonr_trial_50ms_20lags_dict.pkl', "rb") as f:
    pearsonr_trial_avg_dict = pickle.load(f)

with open(results_folder + 'tau_sttc_trial_avg_50ms_20lags_dict.pkl', "rb") as f:
    sttc_trial_avg_dict = pickle.load(f)

with open(results_folder + 'tau_isttc_trial_concat_50ms_20lags_dict.pkl', "rb") as f:
    sttc_trial_concat_dict = pickle.load(f)

print(f'len pearsonr_trial_avg_dict {len(pearsonr_trial_avg_dict)}')
print(f'len sttc_trial_avg_dict {len(sttc_trial_avg_dict)}')
print(f'len sttc_trial_concat_dict {len(sttc_trial_concat_dict)}')

# for trial based measures one realization of trials is taken
random_trials_impl = np.zeros(len(sttc_trial_concat_dict)).astype(int)

pearsontr_trial_avg_plot_df = get_trials_plot_df(pearsonr_trial_avg_dict, 'pearsonr_trial_avg', alphas, fr_values, taus_ms, random_trials_impl)
sttc_trial_concat_plot_df = get_trials_plot_df(sttc_trial_concat_dict, 'sttc_trial_concat', alphas, fr_values, taus_ms, random_trials_impl)
sttc_trial_avg_plot_df = get_trials_plot_df(sttc_trial_avg_dict, 'sttc_trial_avg', alphas, fr_values, taus_ms, random_trials_impl)

# prepare df
tau_trials_long_df = pd.concat([pearsontr_trial_avg_plot_df, sttc_trial_concat_plot_df, sttc_trial_avg_plot_df])
tau_trials_long_df.reset_index(inplace=True, drop=True)
tau_trials_long_df = tau_trials_long_df.merge(lv_df[['unit_id', 'lv']].copy(), on='unit_id', how='left')
print(f'len tau_trials_long_df {len(tau_trials_long_df)}')
# leave only units with both methods (no NaNs and r_squared >= 0)
required = {'pearsonr_trial_avg', 'sttc_trial_concat', 'sttc_trial_avg'}
tau_trials_long_df_clean = (
    tau_trials_long_df.groupby("unit_id")
      .filter(lambda g: (
          len(g) == 3
          and set(g["method"]) == required
          and g.notna().all().all()
          and (g["fit_r_squared"] >= 0).all()
      ))
)
tau_trials_long_df_clean.reset_index(inplace=True, drop=True)
print(f'len tau_trials_long_df_clean {len(tau_trials_long_df_clean)}, per method {len(tau_trials_long_df_clean)/3}')

#save df
tau_trials_long_df_clean.to_csv(results_folder + 'summary_tau_trials_long_df.csv')
tau_trials_long_df_clean.to_pickle(results_folder + 'summary_tau_trials_long_df.pkl')

len pearsonr_trial_avg_dict 100000
len sttc_trial_avg_dict 100000
len sttc_trial_concat_dict 100000
N rows with NaNs 12393
len tau_trials_long_df 300000
len tau_trials_long_df_clean 242847, per method 80949.0
