Generating trials dataset using resampling procedure.

* Number of resampling iterations: 100 (based on bootstrapping stability analysis)
* Number of trials per resampling: N = 40 (based on data in monkey dataset so the number of trials is from experiments)

In [1]:
import csv
import pickle
import numpy as np

# import from scripts
import os
current_wd = os.getcwd()
os.chdir(os.path.abspath("..\\..\\..\\isttc\\scripts"))
#os.chdir(os.path.abspath("C:\\Users\\ipoch\\Documents\\repos\\isttc\\scripts"))
from cfg_global import project_folder_path
from spike_train_utils import get_trials, bin_trials
os.chdir(current_wd)

In [2]:
dataset_folder = project_folder_path + 'results\\synthetic\\dataset\\'

### Get data

In [None]:
with open(dataset_folder + '0_spike_trains_tau100ms_alpha0_3_fr3_5hz_len600sec_1000.pkl','rb') as f:
    data = pickle.load(f)

all_spike_trains = data['spike_trains']
alphas = data['alphas']
fr_values = data['fr_values']
tau_ms = data['tau_ms']
duration_ms = data['duration_ms']

print(f'n spike trains {len(all_spike_trains)}, len {all_spike_trains[0][-1]/1000}, duration_ms {duration_ms}')

In [3]:
with open(dataset_folder + '1_spike_trains_3params_var_len600sec_100000.pkl','rb') as f:
    data_parametric = pickle.load(f)

all_spike_trains_par = data_parametric['spike_trains']
alphas_par = data_parametric['alphas']
fr_values_par = data_parametric['fr_values']
tau_ms_par = data_parametric['tau_ms']
duration_ms_par = data_parametric['duration_ms']

print(f'n spike trains {len(all_spike_trains_par)}, len {all_spike_trains_par[0][-1]/1000}, duration_ms {duration_ms_par}')

n spike trains 100000, len 598.8243581617338, duration_ms 600000


### Make trials 

#### Same n_trials and same trial len

In [None]:
n_trials = 40  
m_iterations = 100

fs = 1000
bin_size = 50 # in ms
signal_len = int(10 * 60 * fs)
trial_len = int(1000 * (fs / 1000)) # 1000ms trials 

In [None]:
trial_dict = {}
trial_binned_dict = {}

for i in range(len(all_spike_trains)):
    if (i % 100) == 0:
        print(f'Processing unit {i}')
    unit_trial_dict = {}
    unit_trial_binned_dict = {}
    for m in range(m_iterations):
        spikes = np.asarray([int(spike) for spike in all_spike_trains[i]])
        spikes_trials = get_trials(spikes, signal_len, n_trials, trial_len, verbose_=False)
        spikes_trials_binned = bin_trials(spikes_trials, trial_len, int(bin_size*(fs/1000)))
        unit_trial_dict[m] = spikes_trials
        unit_trial_binned_dict[m] = spikes_trials_binned

    trial_dict[i] = unit_trial_dict
    trial_binned_dict[i] = unit_trial_binned_dict

In [None]:
trial_dict_full = {'trial_dict': trial_dict, 
                   'n_trials': np.ones(len(trial_dict)).astype(int)*n_trials, 
                   'trial_lens': np.ones(len(trial_dict)).astype(int)*trial_len}
trial_binned_dict_full = {'trial_dict': trial_binned_dict, 
                   'n_trials': np.ones(len(trial_binned_dict)).astype(int)*n_trials, 
                   'trial_lens': np.ones(len(trial_binned_dict)).astype(int)*trial_len}

with open(dataset_folder + '0_trial_tau100ms_alpha0_3_fr3_5hz_len600sec_1000_dict.pkl', 'wb') as f:
    pickle.dump(trial_dict_full, f)

with open(dataset_folder + '0_trial_binned_tau100ms_alpha0_3_fr3_5hz_len600sec_1000_dict.pkl', 'wb') as f:
    pickle.dump(trial_binned_dict_full, f)

#### Varying n_trails and trial len

In [None]:
m_iterations = 100
fs = 1000
bin_size = 50 # in ms
signal_len = int(10 * 60 * fs)

n_trials_min, n_trials_max = 10, 100  
trial_len_min, trial_len_max = 1000, 5*1000  

global_rng = np.random.default_rng(42)
# for v1 datasets
# n_trials_all = global_rng.integers(n_trials_min,
#                               n_trials_max,
#                               size=len(all_spike_trains),
#                               endpoint=True)

# for v2 datasets
possible_n_trials = np.arange(n_trials_min, n_trials_max+1, 10)
n_trials_all = global_rng.choice(possible_n_trials, size=len(all_spike_trains))

possible_trial_len = np.arange(trial_len_min, trial_len_max+1, 500)
trial_lens_all = global_rng.choice(possible_trial_len, size=len(all_spike_trains))

In [None]:
trial_dict = {}
trial_binned_dict = {}

for i in range(len(all_spike_trains)):
    if (i % 100) == 0:
        print(f'Processing unit {i}')
    unit_trial_dict = {}
    unit_trial_binned_dict = {}

    n_trials = n_trials_all[i]
    trial_len = trial_lens_all[i]
    for m in range(m_iterations):
        spikes = np.asarray([int(spike) for spike in all_spike_trains[i]])
        spikes_trials = get_trials(spikes, signal_len, n_trials, trial_len, verbose_=False)
        spikes_trials_binned = bin_trials(spikes_trials, trial_len, int(bin_size*(fs/1000)))
        unit_trial_dict[m] = spikes_trials
        unit_trial_binned_dict[m] = spikes_trials_binned

    trial_dict[i] = unit_trial_dict
    trial_binned_dict[i] = unit_trial_binned_dict

In [None]:
trial_dict_full = {'trial_dict': trial_dict, 
                   'n_trials': n_trials_all, 
                   'trial_lens':trial_lens_all}
trial_binned_dict_full = {'trial_dict': trial_binned_dict, 
                   'n_trials': n_trials_all, 
                   'trial_lens':trial_lens_all}


with open(dataset_folder + '0_trial_var_tau100ms_alpha0_3_fr3_5hz_len600sec_1000_dict_v2.pkl', 'wb') as f:
    pickle.dump(trial_dict_full, f)

with open(dataset_folder + '0_trial_binned_var_tau100ms_alpha0_3_fr3_5hz_len600sec_1000_dict_v2.pkl', 'wb') as f:
    pickle.dump(trial_binned_dict_full, f)

#### Varying n_trails 

In [None]:
m_iterations = 100
fs = 1000
bin_size = 50 # in ms
signal_len = int(10 * 60 * fs)

n_trials_min, n_trials_max = 10, 100  
trial_len = 1000

global_rng = np.random.default_rng(42)
possible_n_trials = np.arange(n_trials_min, n_trials_max+1, 10)
n_trials_all = global_rng.choice(possible_n_trials, size=len(all_spike_trains))

In [None]:
possible_n_trials

In [None]:
trial_dict = {}
trial_binned_dict = {}

for i in range(len(all_spike_trains)):
    if (i % 100) == 0:
        print(f'Processing unit {i}')
    unit_trial_dict = {}
    unit_trial_binned_dict = {}

    n_trials = n_trials_all[i]
    for m in range(m_iterations):
        spikes = np.asarray([int(spike) for spike in all_spike_trains[i]])
        spikes_trials = get_trials(spikes, signal_len, n_trials, trial_len, verbose_=False)
        spikes_trials_binned = bin_trials(spikes_trials, trial_len, int(bin_size*(fs/1000)))
        unit_trial_dict[m] = spikes_trials
        unit_trial_binned_dict[m] = spikes_trials_binned

    trial_dict[i] = unit_trial_dict
    trial_binned_dict[i] = unit_trial_binned_dict

In [None]:
trial_dict_full = {'trial_dict': trial_dict, 
                   'n_trials': n_trials_all, 
                   'trial_lens':np.ones(len(trial_dict)).astype(int)*trial_len}
trial_binned_dict_full = {'trial_dict': trial_binned_dict, 
                   'n_trials': n_trials_all, 
                   'trial_lens':np.ones(len(trial_binned_dict)).astype(int)*trial_len}


with open(dataset_folder + '0_trial_var_tau100ms_alpha0_3_fr3_5hz_len600sec_1000_dict_v1.pkl', 'wb') as f:
    pickle.dump(trial_dict_full, f)

with open(dataset_folder + '0_trial_binned_var_tau100ms_alpha0_3_fr3_5hz_len600sec_1000_dict_v1.pkl', 'wb') as f:
    pickle.dump(trial_binned_dict_full, f)

#### Trials based on parametric dataset (sane n trials and trial len)

In [10]:
n_trials = 100  
m_iterations = 1

fs = 1000
bin_size = 50 # in ms
signal_len = int(10 * 60 * fs)
trial_len = int(1000 * (fs / 1000)) # 1000ms trials 

In [11]:
trial_dict = {}
trial_binned_dict = {}

for i in range(len(all_spike_trains_par)):
    if (i % 100) == 0:
        print(f'Processing unit {i}')
    unit_trial_dict = {}
    unit_trial_binned_dict = {}
    for m in range(m_iterations):
        spikes = np.asarray([int(spike) for spike in all_spike_trains_par[i]])
        spikes_trials = get_trials(spikes, signal_len, n_trials, trial_len, verbose_=False)
        spikes_trials_binned = bin_trials(spikes_trials, trial_len, int(bin_size*(fs/1000)))
        unit_trial_dict[m] = spikes_trials
        unit_trial_binned_dict[m] = spikes_trials_binned

    trial_dict[i] = unit_trial_dict
    trial_binned_dict[i] = unit_trial_binned_dict

Processing unit 0
Processing unit 100
Processing unit 200
Processing unit 300
Processing unit 400
Processing unit 500
Processing unit 600
Processing unit 700
Processing unit 800
Processing unit 900
Processing unit 1000
Processing unit 1100
Processing unit 1200
Processing unit 1300
Processing unit 1400
Processing unit 1500
Processing unit 1600
Processing unit 1700
Processing unit 1800
Processing unit 1900
Processing unit 2000
Processing unit 2100
Processing unit 2200
Processing unit 2300
Processing unit 2400
Processing unit 2500
Processing unit 2600
Processing unit 2700
Processing unit 2800
Processing unit 2900
Processing unit 3000
Processing unit 3100
Processing unit 3200
Processing unit 3300
Processing unit 3400
Processing unit 3500
Processing unit 3600
Processing unit 3700
Processing unit 3800
Processing unit 3900
Processing unit 4000
Processing unit 4100
Processing unit 4200
Processing unit 4300
Processing unit 4400
Processing unit 4500
Processing unit 4600
Processing unit 4700
Proc

In [12]:
trial_dict_full = {'trial_dict': trial_dict, 
                   'n_trials': np.ones(len(trial_binned_dict)).astype(int)*n_trials, 
                   'trial_lens': np.ones(len(trial_binned_dict)).astype(int)*trial_len}
trial_binned_dict_full = {'trial_dict': trial_binned_dict, 
                   'n_trials': np.ones(len(trial_binned_dict)).astype(int)*n_trials, 
                   'trial_lens': np.ones(len(trial_binned_dict)).astype(int)*trial_len}

with open(dataset_folder + '1_trial_3params_var_len600sec_100000_100trials_dict.pkl', 'wb') as f:
    pickle.dump(trial_dict_full, f)

with open(dataset_folder + '1_trial_binned_3params_var_len600sec_100000_100trials_dict.pkl', 'wb') as f:
    pickle.dump(trial_binned_dict_full, f)