# Print contents

In [23]:
from pynwb import NWBHDF5IO

def list_nwb_components(nwb_file_path):
    with NWBHDF5IO(nwb_file_path, 'r') as io:
        nwbfile = io.read()

        print("Acquisition Groups:")
        acquisition = nwbfile.acquisition
        for name in acquisition:
            print(f" - {name}")

        if hasattr(nwbfile, 'stimulus'):
            print("\nStimulus Groups:")
            stimulus = nwbfile.stimulus
            for name in stimulus:
                print(f" - {name}")

# Path to your .nwb file
nwb_file_path = 'data1.nwb'

# List all components
list_nwb_components(nwb_file_path)


Acquisition Groups:
 - ElectricalSeries

Stimulus Groups:
 - TimeSeries_amp_settle
 - TimeSeries_charge_recovery
 - TimeSeries_compliance_limit
 - TimeSeries_stimulation


In [24]:
from pynwb import NWBHDF5IO

# Replace with the path to your .nwb file
nwb_file_path = 'data1.nwb'

# Function to print details of a TimeSeries object
def print_timeseries_details(name, timeseries):
    print(f"\n{name}:")
    print(f" - Comments: {timeseries.comments}")
    print(f" - Description: {timeseries.description}")
    print(f" - Unit: {timeseries.unit}")
    print(f" - Data shape: {timeseries.data.shape}")
    print(f" - Timestamps shape: {timeseries.timestamps.shape if timeseries.timestamps else 'No timestamps'}")

# Open the .nwb file using PyNWB
with NWBHDF5IO(nwb_file_path, 'r') as io:
    nwbfile = io.read()

    # Access and print details of the acquisition group
    print("Acquisition Groups:")
    acquisition = nwbfile.acquisition
    for name, timeseries in acquisition.items():
        print_timeseries_details(name, timeseries)

    # Access and print details of the stimulus group
    if hasattr(nwbfile, 'stimulus'):
        print("\nStimulus Groups:")
        for name, timeseries in nwbfile.stimulus.items():
            print_timeseries_details(name, timeseries)


Acquisition Groups:

ElectricalSeries:
 - Comments: voltage data recorded from the amplifiers of an Intan Technologies chip
 - Description: voltage data recorded from the amplifiers of an Intan Technologies chip
 - Unit: volts
 - Data shape: (65280, 64)
 - Timestamps shape: (65280,)

Stimulus Groups:

TimeSeries_amp_settle:
 - Comments: amplifier settle activity of an Intan Technologies chip
 - Description: amplifier settle activity of an Intan Technologies chip
 - Unit: digital event
 - Data shape: (65280, 64)
 - Timestamps shape: (65280,)

TimeSeries_charge_recovery:
 - Comments: charge recovery activity of an Intan Technologies chip
 - Description: charge recovery activity of an Intan Technologies chip
 - Unit: digital event
 - Data shape: (65280, 64)
 - Timestamps shape: (65280,)

TimeSeries_compliance_limit:
 - Comments: compliance limit activity of an Intan Technologies chip
 - Description: compliance limit activity of an Intan Technologies chip
 - Unit: digital event
 - Data sha

### Shape

In [30]:
from pynwb import NWBHDF5IO
import numpy as np

def extract_electrical_series_data(nwb_file_path):
    with NWBHDF5IO(nwb_file_path, 'r') as io:
        nwbfile = io.read()

        seq_len = 65280  # Number of time points
        electrodes = 64  # Total number of electrodes (8x8 grid)

        # Extract data from the ElectricalSeries component
        electrical_series = nwbfile.acquisition.get('ElectricalSeries')
        if electrical_series is None:
            raise ValueError("ElectricalSeries component not found.")

        data = np.array(electrical_series.data[:])
        
        # Check the shape of the data
        if data.ndim != 2 or data.shape[1] != electrodes:
            raise ValueError("Incorrect data shape in ElectricalSeries")

        # Reshape the data to (seq_len, 8, 8)
        reshaped_data = data.reshape((seq_len, 8, 8))
        return reshaped_data

# Path to your .nwb file
nwb_file_path = 'data1.nwb'

# Extract and reshape ElectricalSeries data
electrical_series_data = extract_electrical_series_data(nwb_file_path)
print("ElectricalSeries dataset shape:", electrical_series_data.shape)

ElectricalSeries dataset shape: (65280, 8, 8)


### Print first 10 rows of data

In [33]:
from pynwb import NWBHDF5IO
import numpy as np

def print_first_10_rows_electrical_series(nwb_file_path):
    with NWBHDF5IO(nwb_file_path, 'r') as io:
        nwbfile = io.read()

        # Access ElectricalSeries data
        electrical_series = nwbfile.acquisition.get('ElectricalSeries')
        if electrical_series is None:
            print("ElectricalSeries component not found.")
            return

        # Extract and reshape data
        data = np.array(electrical_series.data[:])
        seq_len, electrodes = data.shape
        reshaped_data = data.reshape((seq_len, 8, 8))

        # Print the first 10 rows
        print("\nFirst 10 rows of ElectricalSeries:")
        for row in range(10):
            print(f"Timepoint {row + 1}:\n{reshaped_data[row, :, :]}")

# Path to your .nwb file
nwb_file_path = 'data1.nwb'  # Replace with the correct file path

# Print the first 10 rows of ElectricalSeries
print_first_10_rows_electrical_series(nwb_file_path)



First 10 rows of ElectricalSeries:
Timepoint 1:
[[ 2.53500002e-06  7.21500010e-06 -3.89999997e-07  3.90000014e-06
   4.29000011e-06 -7.01999988e-06 -1.13099995e-05  2.73000001e-06]
 [ 3.50999994e-06  1.40399998e-05 -7.01999988e-06  1.36500000e-06
   3.11999997e-06 -2.14500005e-06  1.95000007e-06  9.75000034e-07]
 [ 2.53500002e-06  7.21500010e-06 -3.89999997e-07  3.90000014e-06
   4.29000011e-06 -7.01999988e-06 -1.13099995e-05  2.73000001e-06]
 [ 3.50999994e-06  1.40399998e-05 -7.01999988e-06  1.36500000e-06
   3.11999997e-06 -2.14500005e-06  1.95000007e-06  9.75000034e-07]
 [ 2.53500002e-06  7.21500010e-06 -3.89999997e-07  3.90000014e-06
   4.29000011e-06 -7.01999988e-06 -1.13099995e-05  2.73000001e-06]
 [ 3.50999994e-06  1.40399998e-05 -7.01999988e-06  1.36500000e-06
   3.11999997e-06 -2.14500005e-06  1.95000007e-06  9.75000034e-07]
 [ 2.53500002e-06  7.21500010e-06 -3.89999997e-07  3.90000014e-06
   4.29000011e-06 -7.01999988e-06 -1.13099995e-05  2.73000001e-06]
 [ 3.50999994e-06  1

### Check for non zeros

In [32]:
def check_for_nonzero_values(datasets):
    for component, data in datasets.items():
        non_zero_count = np.count_nonzero(data)
        total_elements = data.size
        print(f"{component}:")
        print(f" - Total non-zero elements: {non_zero_count}")
        print(f" - Total elements: {total_elements}")
        print(f" - Percentage of non-zero elements: {100 * non_zero_count / total_elements:.2f}%\n")

# Call the function to check for non-zero values
check_for_nonzero_values(datasets)

ElectricalSeries:
 - Total non-zero elements: 4173640
 - Total elements: 4177920
 - Percentage of non-zero elements: 99.90%

TimeSeries_amp_settle:
 - Total non-zero elements: 0
 - Total elements: 4177920
 - Percentage of non-zero elements: 0.00%

TimeSeries_charge_recovery:
 - Total non-zero elements: 0
 - Total elements: 4177920
 - Percentage of non-zero elements: 0.00%

TimeSeries_compliance_limit:
 - Total non-zero elements: 0
 - Total elements: 4177920
 - Percentage of non-zero elements: 0.00%

TimeSeries_stimulation:
 - Total non-zero elements: 0
 - Total elements: 4177920
 - Percentage of non-zero elements: 0.00%



# Next step

In [12]:

#Reads In Data to (N, 1, 8, 8, S) array

import numpy as np
import glob
import os

def load_raw_data(filename: str):
    # Load the npz file
    loaded = np.load(filename)
    
    # Extract the data for each timepoint and store it in a list
    data = []
    for i in range(4177920):  # Adjust this number based on the actual number of timepoints in your data
        timepoint_key = f'timepoint_{i}'  # Construct the key name
        if timepoint_key in loaded:
            timepoint_data = loaded[timepoint_key]
            data.append(timepoint_data)
        else:
            break  # Exit the loop if the key is not found
    return np.array(data)



def create_array(dirPath:str, offset:int,listFiles:list):
    """
    INPUT:
    dirPath: [str] path to directory
    offset: [int] time delay before before recording after a stimulus
    OUTPUT: 
    dataset: [np.array] an array of shape (N,8,8,3001) containing the data
    """
    # number of pre and post stimulation files 
    nbr_files = int(len(listFiles)/2)
    total_nbr_stim_per_file,nbr_stim_per_electrode,nbr_electrodes,nbr_neurospheres,seq_len = recording_parameters(dirPath,offset)

    # create two arrays of size (N,8,8,3001), each one corresponding to one of the class
    raw1_reshaped = np.zeros((total_nbr_stim_per_file*nbr_files,nbr_electrodes,nbr_electrodes,seq_len+1))
    raw2_reshaped = np.zeros((total_nbr_stim_per_file*nbr_files,nbr_electrodes,nbr_electrodes,seq_len+1))

    # fill these arrays with corresponding values from files
    for start_exp_index in range(nbr_files):
        print(f'experiment number:{start_exp_index}')
        raw1 = load_raw_data(f'{dirPath}/exp_{start_exp_index}_0_{offset}.npz')
        raw2 = load_raw_data(f'{dirPath}/exp_{start_exp_index}_1_{offset}.npz')

        # reshape (80,8,8,3000)
        raw1_one_file = np.zeros((total_nbr_stim_per_file,nbr_electrodes,nbr_electrodes,raw1[0].shape[2]))
        raw2_one_file = np.zeros((total_nbr_stim_per_file,nbr_electrodes,nbr_electrodes,raw2[0].shape[2]))

        # iterate through electrode stimulated and neurospheres
        for electrode in range(nbr_electrodes):
            #nbr_neurospheres = int(raw1[electrode].shape[1]/8)
            # N: number of reptition of the stimulus
            N = raw1[1].shape[0]
            for i in range(nbr_neurospheres):
                j = nbr_electrodes*i
                raw1_one_file[N*i:N*(i+1),electrode] = raw1[electrode][:N,j:j+nbr_electrodes]
                raw2_one_file[N*i:N*(i+1),electrode] = raw2[electrode][:N,j:j+nbr_electrodes]
        raw1_reshaped[total_nbr_stim_per_file*start_exp_index:total_nbr_stim_per_file*(start_exp_index+1),:,:,:seq_len] = raw1_one_file
        raw2_reshaped[total_nbr_stim_per_file*start_exp_index:total_nbr_stim_per_file*(start_exp_index+1),:,:,:seq_len] = raw2_one_file

    # append label
    print("append label")
    raw1_reshaped[:,:,:,seq_len] = np.zeros((raw1_reshaped.shape[0],nbr_electrodes,nbr_electrodes))
    raw2_reshaped[:,:,:,seq_len] = np.ones((raw2_reshaped.shape[0],nbr_electrodes,nbr_electrodes))

    #return full dataset
    print("return dataset")
    dataset = np.zeros((total_nbr_stim_per_file*nbr_files*2,nbr_electrodes,nbr_electrodes,seq_len+1))
    dataset[:total_nbr_stim_per_file*nbr_files] = raw1_reshaped
    dataset[total_nbr_stim_per_file*nbr_files:] = raw2_reshaped
    return dataset.astype(np.float32)

def recording_parameters(dirPath:str,offset:int):
    """
    INPUT:
    dirPath: [str] path to directory
    offset: [int] time delay before recording after a stimulus
    OUTPUT:
    total_nbr_stim_per_file: [int] number of electrodes time the number of stimulation per electrode
    nbr_stim_per_electrode: [int] number of time the experiment is repeated throughout a file (9)
    nbr_electrodes: [int] always 8. 
    nbr_neurospheres: [int] number of neurospheres considered (4 or 8)
    seq_len: [int] length of of the data in the time dimension (3000)
    """
    #file_data = load_raw_data(f'{dirPath}/exp_{0}_0_{offset}.npz')
    file_data = load_raw_data(dirPath)
    nbr_stim_per_electrode = file_data[1].shape[0]
    nbr_electrodes = 8
    nbr_neurospheres = int(file_data[1].shape[1]/8)
    total_nbr_stim_per_file = nbr_stim_per_electrode*nbr_neurospheres
    seq_len = file_data[1].shape[2]
    return total_nbr_stim_per_file,nbr_stim_per_electrode,nbr_electrodes,nbr_neurospheres,seq_len

def convertFlatRaw4x3x4x3(raw: np.array):
    """
    INPUT:
    raw: [np.array] array of shape (N,1,8,8,M) with N the number of trials and M the length of the sequence
    OUTPUT:
    array4x3: [np.array] array of shape (N,1,4,3,4,3,M)
    """
    map8 = np.array([[1,0],[0,1],[1,1],[1,2],[2,2],[2,1],[3,1],[2,0]])
    array4x3 = torch.zeros((raw.shape[0],1,4,3,4,3,raw.shape[-1]),dtype=torch.float32)
    for i in range(8):
        for j in range(8):
            array4x3[:,0,map8[i,0],map8[i,1],map8[j,0],map8[j,1],:] = raw[:,0,i,j,:]
    return array4x3

def getListFiles(dirPath):
    listFiles = filter(os.path.isfile,glob.glob(f'{dirPath}/*.npz'))
    listFiles = sorted(listFiles, key=os.path.getmtime)
    return listFiles

def read_in_data(dirPath):
    offset_trigger_ms = 5
    listFiles = getListFiles(dirPath)
    dataset = create_array(dirPath, offset_trigger_ms, listFiles)
    print("converting spatial position")
    dataset = convertFlatRaw4x3x4x3(dataset)
    dataset = np.expand_dims(dataset,1)
    return dataset

# Replace 'your/directory/path' with the actual path where your .npz files are located
directory_path = '/home/vincent/AAA_projects/UnlimitedResearchCooperative/Synthetic_Intelligence_Labs/human-cortical-organoid-signal-analysis/IntanToNWBtoNPZ/ElectricalSeries.npz'
read_in_data(directory_path)



IndexError: index 1 is out of bounds for axis 0 with size 0

# Second

In [9]:
import numpy as np
import torch
import pywt

def prepare_electrical_series_data(filename, segment_length=3000):
    # Load the data
    loaded = np.load(filename)
    data = loaded['data']  # Ensure 'data' is the correct key in your npz file

    # Calculate the number of full segments
    num_segments = data.shape[0] // segment_length

    # Reshape the data into segments of 3000 data points, discard the remainder
    reshaped_data = data[:num_segments * segment_length].reshape(num_segments, segment_length, -1)

    return reshaped_data

# Replace with your actual file path
file_path = 'ElectricalSeries.npz'
prepared_data = prepare_electrical_series_data(file_path)
reshaped_data = prepared_data.transpose(0, 2, 1)

In [10]:
# Print the shape of the reshaped data
print("Shape of reshaped data:", reshaped_data.shape)

# Print the first 5 rows of the reshaped data
print("First 5 rows of reshaped data:")
print(reshaped_data[:5])

Shape of reshaped data: (21, 64, 3000)
First 5 rows of reshaped data:
[[[ 2.5350000e-06 -5.0700000e-06  1.5600000e-06 ...  1.2558000e-04
    1.2129000e-04  1.2675000e-04]
  [ 7.2150001e-06  5.4600000e-06  5.6549998e-06 ...  1.2733499e-04
    1.1934000e-04  1.2382500e-04]
  [-3.9000000e-07 -2.9250000e-06  1.1505000e-05 ...  1.2480000e-04
    1.2070500e-04  1.2148500e-04]
  ...
  [-2.1450001e-06  1.2480000e-05  8.1899998e-06 ...  1.2207000e-04
    1.2675000e-04  1.2148500e-04]
  [ 1.9500001e-06 -1.5600000e-06 -1.7550000e-06 ...  1.2499500e-04
    1.1953500e-04  1.2285000e-04]
  [ 9.7500003e-07  8.3850000e-06  9.5550004e-06 ...  1.1563500e-04
    1.2850499e-04  1.2090000e-04]]

 [[ 1.1992500e-04  1.3571999e-04  1.2051000e-04 ...  3.1200001e-05
    4.2315001e-05  3.2759999e-05]
  [ 1.3162500e-04  1.2324000e-04  1.2480000e-04 ...  4.4655000e-05
    3.3540000e-05  3.9194998e-05]
  [ 1.2733499e-04  1.2714000e-04  1.2285000e-04 ...  3.7829999e-05
    3.8220001e-05  4.2510001e-05]
  ...
  [ 1.1

In [15]:
import numpy as np
import pywt
import os
import glob

def load_raw_data(filename: str):
    list_array = []
    for i in range(8):
        list_array.append(f't{i}')
    
    loaded = np.load(filename)
    raws = {}
    for i, aname in enumerate(list_array):
        raws[i] = loaded[aname]
    
    return raws

# convert (N,1,8,8,M) to (N,1,4,3,4,3,M)
def convertFlatRaw4x3x4x3(raw: np.array):
    """
    INPUT:
    raw: [np.array] array of shape (N,1,8,8,M) with N the number of trials and M the length of the sequence
    OUTPUT:
    array4x3: [np.array] array of shape (N,1,4,3,4,3,M)
    """
    map8 = np.array([[1,0],[0,1],[1,1],[1,2],[2,2],[2,1],[3,1],[2,0]])
    array4x3 = torch.zeros((raw.shape[0],1,4,3,4,3,raw.shape[-1]),dtype=torch.float32)
    for i in range(8):
        for j in range(8):
            array4x3[:,0,map8[i,0],map8[i,1],map8[j,0],map8[j,1],:] = raw[:,0,i,j,:]
    return array4x3

# compute important parameters to create the dataset
def recording_parameters(raw_data, offset:int):
    """
    INPUT:
    raw_data: [dict] dictionary containing raw data arrays
    offset: [int] time delay before recording after a stimulus
    OUTPUT:
    total_nbr_stim_per_file: [int] number of electrodes time the number of stimulation per electrode
    nbr_stim_per_electrode: [int] number of time the experiment is repeated throughout a file (9)
    nbr_electrodes: [int] always 8. 
    nbr_neurospheres: [int] number of neurospheres considered (4 or 8)
    seq_len: [int] length of of the data in the time dimension (3000)
    """
    # Assuming raw_data[1] is structured in a way that these indices make sense
    nbr_stim_per_electrode = raw_data[1].shape[0]
    nbr_electrodes = 8
    nbr_neurospheres = int(raw_data[1].shape[1] / 8)
    total_nbr_stim_per_file = nbr_stim_per_electrode * nbr_neurospheres
    seq_len = raw_data[1].shape[2]
    return total_nbr_stim_per_file, nbr_stim_per_electrode, nbr_electrodes, nbr_neurospheres, seq_len

# create an array of shape (N,8,8,3000)
def create_array(filename: str, offset: int):
    """
    INPUT:
    filename: [str] path to the .npz file
    offset: [int] time delay before recording after a stimulus
    OUTPUT: 
    dataset: [np.array] an array of shape (N,8,8,3001) containing the data
    """
    raw_data = load_raw_data(filename)
    total_nbr_stim_per_file, nbr_stim_per_electrode, nbr_electrodes, nbr_neurospheres, seq_len = recording_parameters(raw_data, offset)


    # create two arrays of size (N,8,8,3001), each one corresponding to one of the class
    raw1_reshaped = np.zeros((total_nbr_stim_per_file*nbr_files,nbr_electrodes,nbr_electrodes,seq_len+1))
    raw2_reshaped = np.zeros((total_nbr_stim_per_file*nbr_files,nbr_electrodes,nbr_electrodes,seq_len+1))

    # fill these arrays with corresponding values from files
    for start_exp_index in range(nbr_files):
        print(f'experiment number:{start_exp_index}')
        raw1 = load_raw_data(f'{dirPath}/exp_{start_exp_index}_0_{offset}.npz')
        raw2 = load_raw_data(f'{dirPath}/exp_{start_exp_index}_1_{offset}.npz')

        # reshape (80,8,8,3000)
        raw1_one_file = np.zeros((total_nbr_stim_per_file,nbr_electrodes,nbr_electrodes,raw1[0].shape[2]))
        raw2_one_file = np.zeros((total_nbr_stim_per_file,nbr_electrodes,nbr_electrodes,raw2[0].shape[2]))

        # iterate through electrode stimulated and neurospheres
        for electrode in range(nbr_electrodes):
            #nbr_neurospheres = int(raw1[electrode].shape[1]/8)
            # N: number of reptition of the stimulus
            N = raw1[1].shape[0]
            for i in range(nbr_neurospheres):
                j = nbr_electrodes*i
                raw1_one_file[N*i:N*(i+1),electrode] = raw1[electrode][:N,j:j+nbr_electrodes]
                raw2_one_file[N*i:N*(i+1),electrode] = raw2[electrode][:N,j:j+nbr_electrodes]
        raw1_reshaped[total_nbr_stim_per_file*start_exp_index:total_nbr_stim_per_file*(start_exp_index+1),:,:,:seq_len] = raw1_one_file
        raw2_reshaped[total_nbr_stim_per_file*start_exp_index:total_nbr_stim_per_file*(start_exp_index+1),:,:,:seq_len] = raw2_one_file

    # append label
    print("append label")
    raw1_reshaped[:,:,:,seq_len] = np.zeros((raw1_reshaped.shape[0],nbr_electrodes,nbr_electrodes))
    raw2_reshaped[:,:,:,seq_len] = np.ones((raw2_reshaped.shape[0],nbr_electrodes,nbr_electrodes))

    #return full dataset
    print("return dataset")
    dataset = np.zeros((total_nbr_stim_per_file*nbr_files*2,nbr_electrodes,nbr_electrodes,seq_len+1))
    dataset[:total_nbr_stim_per_file*nbr_files] = raw1_reshaped
    dataset[total_nbr_stim_per_file*nbr_files:] = raw2_reshaped
    return dataset.astype(np.float32)


# reduce the dimension of the recordings using wavelet transforms (3000->750)
def wavelet_filter(data:np.array):
    w1 = data[:,:,:,:,:-1]
    (w1,_)=pywt.dwt(w1,wavelet='db4',axis=-1,mode='per')
    (w1,_)=pywt.dwt(w1,wavelet='db4',axis=-1,mode='per')
    return np.concatenate((w1,data[:,:,:,:,[-1]]),axis=-1)
     

# Use ElectricalSeries.npz file
file_path = 'ElectricalSeries.npz'
offset_ms = 5
dataset = create_array(file_path, offset_ms)
dataset = np.expand_dims(dataset, 1)
rng = np.random.default_rng(seed=0)
rng.shuffle(dataset, axis=0)
dataset = wavelet_filter(dataset).astype(np.float32)

print("Dataset Shape:", dataset.shape)

KeyError: 't0 is not a file in the archive'