In [1]:
import numpy as np
import pandas as pd
import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=FutureWarning)
    import h5py
from tqdm import tqdm_notebook as tqdm
import gc
import numpy.ma as ma

In [2]:
def create_spike_matrix(f):
    """
    Takes in an HDF5 file produced by the BrainGrid simuation, extracts information about time 
    and location of spikes and converts it into a dataframe with the following columns: time_step 
    (time step when the spike happened), xloc, yloc (coordinates of the spike).
    
    Args: 
        f(HDF5): loaded HDF5 file to read data from.
    
    Returns:
        spikes_loc(pandas dataframe): dataframe that contains spiking data. The dataframe has the 
            following columns: time_step (time step when the spike happened), xloc, yloc 
            (coordinates of the spike).
    """
    
    # get the spikes time steps and coordinates data from the hdf5 file and convert the data 
    # into numpy arrays
    spikes = np.array(f['/spikesProbedNeurons'], dtype='uint32')
    xloc = np.array(f['/xloc'], dtype='uint8')
    yloc = np.array(f['/yloc'], dtype='uint8')
    idx = np.array(f['/starterNeurons'], dtype='uint16')
    ids = np.array(f['/probedNeurons'], dtype='uint16')
    starter = np.zeros((10000,))
    starter[idx] = 1
    # m is the max number of spikes per neuron, n is the number of neurons in the simulation
    m, n = spikes.shape
    # transform the spikes matrix:
    # 1. Traspose so that each row is a sequence of spikes of one neuron (instead of a column)
    # 2. Flattem the matrix 
    # 3. Reshape into a 2d array from (m*n, ) to (m*n, 1). This is needed for further processing. 
    spikes = np.transpose(spikes).flatten().reshape(m*n, 1)
    # create a mask to remove non-spikes (where time step=0)
    mask = ma.masked_equal(spikes, 0).reshape(m*n, )
    # Transform the coordinate vectors:
    # 1. Make the vectors match the time step sequence. Repeat the values so that first m values 
    # in the x and y vectors are the coordinates of the first neuron
    # 2. Remove the coordinates corresponding to non-spikes
    # 3. Concatenate x and y, the result is an array xy of shape (m*n, 2)
    xloc = np.compress(mask, np.repeat(xloc, m).reshape(m*n, 1), axis=0)
    yloc = np.compress(mask, np.repeat(yloc, m).reshape(m*n, 1), axis=0)
    xy = np.concatenate((xloc, yloc), axis=1)
    # delete xloc and yloc to free memory
    del xloc
    del yloc
    gc.collect()
    starter = np.compress(mask, np.repeat(starter, m).reshape(m*n, 1), axis=0)
    ids = np.compress(mask, np.repeat(ids, m).reshape(m*n, 1), axis=0)
    t = np.concatenate((ids, starter), axis=1)
    xyis = np.concatenate((xy, t), axis=1)
    del xy
    del t
    del starter 
    del ids
    gc.collect()
    # Remove non-spikes from the time step array, concatenate it to xy, 
    # and convert the result into a dataframe
    spikes_loc = pd.DataFrame(np.concatenate((np.compress(mask, spikes, axis=0), xyis), axis=1))
    # delete spikes to free the memory
    del spikes
    gc.collect()
    spikes_loc.rename(columns={0:'time_step', 1:'xloc', 2:'yloc', 3:'id', 4:'starter'}, inplace=True)
    spikes_loc.sort_values(by='time_step', inplace=True)
    spikes_loc.reset_index(drop=True, inplace=True)
    spikes_loc['time_step'] = spikes_loc['time_step'].astype('int32')
    spikes_loc['id'] = spikes_loc['id'].astype('int16')
    spikes_loc['xloc'] = spikes_loc['xloc'].astype('uint8')
    spikes_loc['yloc'] = spikes_loc['yloc'].astype('uint8')
    spikes_loc['starter'] = spikes_loc['starter'].astype('uint8')
    
    return spikes_loc

In [3]:
#load an hdf5 file with the simulation data
f = h5py.File('/home/NETID/lundvm/data/tR_1.0--fE_0.90.h5', 'r')
print(list(f.keys()), '\n')
print(list(f.values()))

['Tsim', 'burstinessHist', 'neuronThresh', 'neuronTypes', 'probedNeurons', 'radiiHistory', 'ratesHistory', 'simulationEndTime', 'spikesHistory', 'spikesProbedNeurons', 'starterNeurons', 'xloc', 'yloc'] 

[<HDF5 dataset "Tsim": shape (1,), type "<f4">, <HDF5 dataset "burstinessHist": shape (60000,), type "<i4">, <HDF5 dataset "neuronThresh": shape (10000,), type "<f4">, <HDF5 dataset "neuronTypes": shape (10000,), type "<i4">, <HDF5 dataset "probedNeurons": shape (10000,), type "<i4">, <HDF5 dataset "radiiHistory": shape (601, 10000), type "<f4">, <HDF5 dataset "ratesHistory": shape (601, 10000), type "<f4">, <HDF5 dataset "simulationEndTime": shape (1,), type "<f4">, <HDF5 dataset "spikesHistory": shape (6000000,), type "<i4">, <HDF5 dataset "spikesProbedNeurons": shape (375898, 10000), type "<u8">, <HDF5 dataset "starterNeurons": shape (1000,), type "<i4">, <HDF5 dataset "xloc": shape (10000,), type "<i4">, <HDF5 dataset "yloc": shape (10000,), type "<i4">]


In [4]:
%%time
# create the spiking data matrix
spikes_loc = create_spike_matrix(f)

CPU times: user 7min 25s, sys: 3min 1s, total: 10min 26s
Wall time: 7min 51s


In [5]:
spikes_loc.shape

(570189562, 5)

In [6]:
spikes_loc = spikes_loc[spikes_loc['starter'] == 0]

In [7]:
xloc = np.array(f['/xloc'], dtype='uint8').reshape(10000,1)
yloc = np.array(f['/yloc'], dtype='uint8').reshape(10000,1)
xy = np.concatenate((xloc, yloc), axis=1)

In [8]:
neurons = pd.DataFrame(xy)
neurons = neurons.astype(int)

In [9]:
neurons.head()

Unnamed: 0,0,1
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0


In [11]:
np.savetxt('/home/NETID/lundvm/data/neurons.csv', neurons, fmt='%i', delimiter=',')

In [12]:
spikes_loc.head()

Unnamed: 0,time_step,xloc,yloc,id,starter
20588242,96709709,41,92,9241,0
20588247,96709728,41,93,9341,0
20588253,96709741,40,92,9240,0
20588254,96709743,42,91,9142,0
20588255,96709743,40,91,9140,0


In [20]:
spikes_loc.drop(columns=['xloc', 'yloc'], inplace=True)

In [21]:
spikes_loc

Unnamed: 0,time_step,id
20588242,96709709,9241
20588247,96709728,9341
20588253,96709741,9240
20588254,96709743,9142
20588255,96709743,9140
20588258,96709748,9342
20588259,96709749,9340
20588261,96709754,9440
20588263,96709756,9242
20588265,96709760,9442


In [22]:
spikes_loc.to_csv('/home/NETID/lundvm/data/allSpikeTime.csv', header=False, index=False)

In [14]:
y = np.bincount(spikes_loc['time_step'].values)
ii = np.nonzero(y)[0]
spikes_time_count = np.vstack((ii,y[ii])).T

In [16]:
t = pd.DataFrame(spikes_time_count)

In [17]:
t

Unnamed: 0,0,1
0,96709709,1
1,96709728,1
2,96709741,1
3,96709743,2
4,96709748,1
5,96709749,1
6,96709754,1
7,96709756,1
8,96709760,1
9,96709762,1


In [None]:
t.to_csv('/home/NETID/lundvm/data/allSpikeTimeCount.csv', header=False, index=False)

In [None]:
spikes_time_count.shape

In [None]:
bursts = pd.read_csv()

In [None]:
import csv
import pandas as pd
path = '/home/NETID/lundvm/data/'
file = 'allSpikeTime.csv'
f = open(path+file,'rt')
reader = csv.reader(f)

In [None]:
for row in reader:
    print(row)

In [None]:
min_ibi = min(bursts['IBI'].values)
max_ibi = max(bursts['IBI'].values)
ibi_mean = np.mean(bursts['IBI'].values)
ibi_std = np.std(bursts['IBI'].values)
burst_start_mask = np.in1d(spike_count[0], bursts['StartT'].values)
num_bursts = bursts['StartT'].values.shape[0]