This notebook extracts spike and neuron data from the HDF5 file and saves it in csv files. The following files are created:
* <b>neurons.csv:</b> contains x and y coordinates of neurons, row index in the csv == neuron index
* <b>allSpikeTime.csv:</b> contains the following columns: spike timestep, neuron id, xloc, yloc (coordinates of the spiking neuron, starter (1 if the spiking neuron is a starter neuron, 0 otherwise; type: int).
* <b>allSpikeTimeCount.csv:</b> contains spike timesteps and count of spikes per timestep

Author: Mariia Lundvall (lundvm@uw.edu) <br>
Date: 07/04/2019

In [1]:
import numpy as np
import pandas as pd
import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=FutureWarning)
    import h5py
from tqdm import tqdm_notebook as tqdm
import gc
import numpy.ma as ma

<b> NOTE: Update the paths prior to running this notebook. <b>

In [2]:
# path to the HDF5 file
hdf5_path = '/home/NETID/lundvm/data/tR_1.0--fE_0.90.h5'
# path where to save a file with neuron data
neurons_csv = '/home/NETID/lundvm/data/neurons_test_070419.csv'
# path where to save a file with spike data
spikes_csv = '/home/NETID/lundvm/data/allSpikeTime_test_070419.csv'
# path to where save a file with spike counts
spike_count_csv = '/home/NETID/lundvm/data/allSpikeTimeCount_test_070419.csv'

In [3]:
def create_spike_matrix(f):
    """
    Takes in an HDF5 file produced by the BrainGrid simuation, extracts information about time 
    and location of spikes and converts it into a dataframe with the following columns: time_step 
    (time step when the spike happened, type: int), id (neuron id, type: int), xloc, yloc 
    (coordinates of the spike, type: int), starter (1 if the spiking neuron is a starter neuron, 0 otherwise; type: int).
    
    Args: 
        f(HDF5): loaded HDF5 file to read data from.
    
    Returns:
        spikes_loc(pandas dataframe): dataframe that contains spiking data. The dataframe has the 
            following columns: time_step (time step when the spike happened, type: int), id (neuron id, type: int), 
            xloc, yloc (coordinates of the spike, type: int), 
            starter (1 of the neuron is a starter neuron, 0 otherwise; type: int).
    """
    
    # get the spikes time steps and coordinates data from the hdf5 file and convert the data 
    # into numpy arrays
    spikes = np.array(f['/spikesProbedNeurons'], dtype='uint32')
    xloc = np.array(f['/xloc'], dtype='uint8')
    yloc = np.array(f['/yloc'], dtype='uint8')
    idx = np.array(f['/starterNeurons'], dtype='uint16')
    ids = np.array(f['/probedNeurons'], dtype='uint16')
    starter = np.zeros((10000,))
    starter[idx] = 1
    # m is the max number of spikes per neuron, n is the number of neurons in the simulation
    m, n = spikes.shape
    # transform the spikes matrix:
    # 1. Traspose so that each row is a sequence of spikes of one neuron (instead of a column)
    # 2. Flattem the matrix 
    # 3. Reshape into a 2d array from (m*n, ) to (m*n, 1). This is needed for further processing. 
    spikes = np.transpose(spikes).flatten().reshape(m*n, 1)
    # create a mask to remove non-spikes (where time step=0)
    mask = ma.masked_equal(spikes, 0).reshape(m*n, )
    # Transform the coordinate vectors:
    # 1. Make the vectors match the time step sequence. Repeat the values so that first m values 
    # in the x and y vectors are the coordinates of the first neuron
    # 2. Remove the coordinates corresponding to non-spikes
    # 3. Concatenate x and y, the result is an array xy of shape (m*n, 2)
    xloc = np.compress(mask, np.repeat(xloc, m).reshape(m*n, 1), axis=0)
    yloc = np.compress(mask, np.repeat(yloc, m).reshape(m*n, 1), axis=0)
    xy = np.concatenate((xloc, yloc), axis=1)
    # delete xloc and yloc to free memory
    del xloc
    del yloc
    gc.collect()
    starter = np.compress(mask, np.repeat(starter, m).reshape(m*n, 1), axis=0)
    ids = np.compress(mask, np.repeat(ids, m).reshape(m*n, 1), axis=0)
    t = np.concatenate((ids, starter), axis=1)
    xyis = np.concatenate((xy, t), axis=1)
    del xy
    del t
    del starter 
    del ids
    gc.collect()
    # Remove non-spikes from the time step array, concatenate it to xy, 
    # and convert the result into a dataframe
    spikes_loc = pd.DataFrame(np.concatenate((np.compress(mask, spikes, axis=0), xyis), axis=1))
    # delete spikes to free the memory
    del spikes
    gc.collect()
    spikes_loc.rename(columns={0:'time_step', 1:'xloc', 2:'yloc', 3:'id', 4:'starter'}, inplace=True)
    spikes_loc.sort_values(by='time_step', inplace=True)
    spikes_loc.reset_index(drop=True, inplace=True)
    spikes_loc['time_step'] = spikes_loc['time_step'].astype('int32')
    spikes_loc['id'] = spikes_loc['id'].astype('int16')
    spikes_loc['xloc'] = spikes_loc['xloc'].astype('uint8')
    spikes_loc['yloc'] = spikes_loc['yloc'].astype('uint8')
    spikes_loc['starter'] = spikes_loc['starter'].astype('uint8')
    
    return spikes_loc

In [4]:
#load an hdf5 file with the simulation data
f = h5py.File(hdf5_path, 'r')
print(list(f.keys()), '\n')
print(list(f.values()))

['Tsim', 'burstinessHist', 'neuronThresh', 'neuronTypes', 'probedNeurons', 'radiiHistory', 'ratesHistory', 'simulationEndTime', 'spikesHistory', 'spikesProbedNeurons', 'starterNeurons', 'xloc', 'yloc'] 

[<HDF5 dataset "Tsim": shape (1,), type "<f4">, <HDF5 dataset "burstinessHist": shape (60000,), type "<i4">, <HDF5 dataset "neuronThresh": shape (10000,), type "<f4">, <HDF5 dataset "neuronTypes": shape (10000,), type "<i4">, <HDF5 dataset "probedNeurons": shape (10000,), type "<i4">, <HDF5 dataset "radiiHistory": shape (601, 10000), type "<f4">, <HDF5 dataset "ratesHistory": shape (601, 10000), type "<f4">, <HDF5 dataset "simulationEndTime": shape (1,), type "<f4">, <HDF5 dataset "spikesHistory": shape (6000000,), type "<i4">, <HDF5 dataset "spikesProbedNeurons": shape (375898, 10000), type "<u8">, <HDF5 dataset "starterNeurons": shape (1000,), type "<i4">, <HDF5 dataset "xloc": shape (10000,), type "<i4">, <HDF5 dataset "yloc": shape (10000,), type "<i4">]


In [5]:
%%time
# create the spiking data matrix
spikes_loc = create_spike_matrix(f)

CPU times: user 7min 31s, sys: 2min 26s, total: 9min 57s
Wall time: 7min 13s


In [6]:
# drop all starter neurons (optional)
# spikes_loc = spikes_loc[spikes_loc['starter'] == 0]
# drop 'xloc', 'yloc', and 'starter' columns (optional, update column names as necessary)
# spikes_loc.drop(columns=['xloc', 'yloc', 'starter'], inplace=True)

In [7]:
# save spike data to a csv
spikes_loc.to_csv(spikes_csv, header=False, index=False)

In [8]:
# extract neuron data and save it to a csv
xloc = np.array(f['/xloc'], dtype='uint8').reshape(10000,1)
yloc = np.array(f['/yloc'], dtype='uint8').reshape(10000,1)
xy = np.concatenate((xloc, yloc), axis=1)
neurons = pd.DataFrame(xy)
neurons = neurons.astype(int)
np.savetxt(neurons_csv, neurons, fmt='%i', delimiter=',')

In [9]:
# create a dataframe with counts of spikes per timestep
temp1 = np.bincount(spikes_loc['time_step'].values)
temp2 = np.nonzero(temp1)[0]
spikes_time_count = np.vstack((temp2,temp1[temp2])).T
spike_count_df = pd.DataFrame(spikes_time_count)
# save the dataframe to a csv
spike_count_df.to_csv(spike_count_csv, header=False, index=False)