# NWB to NPZ

In [3]:
import numpy as np
import os
import glob  
from pynwb import NWBHDF5IO
import torch

def extract_and_save_npz(nwb_file_path, output_dir):
    with NWBHDF5IO(nwb_file_path, 'r') as io:
        nwbfile = io.read()

        # Extract acquisition groups
        acquisition_data = {name: np.array(timeseries.data[:]) for name, timeseries in nwbfile.acquisition.items()}
        
        # Extract stimulus groups if present
        stimulus_data = {name: np.array(timeseries.data[:]) for name, timeseries in nwbfile.stimulus.items()} if hasattr(nwbfile, 'stimulus') else {}

        # Combine and save the data to .npz files
        for name, data in {**acquisition_data, **stimulus_data}.items():
            np.savez_compressed(os.path.join(output_dir, f'{name}.npz'), data=data)

        print("Data extraction and saving completed.")

def load_raw_data(filename: str):
    with np.load(filename) as loaded:
        # Assuming the .npz file contains an array with the key 'data'
        data = loaded['data']
    return data

def create_array(dirPath:str, offset:int,listFiles:list):
    """
    INPUT:
    dirPath: [str] path to directory
    offset: [int] time delay before before recording after a stimulus
    OUTPUT: 
    dataset: [np.array] an array of shape (N,8,8,3001) containing the data
    """
    # number of pre and post stimulation files 
    nbr_files = int(len(listFiles)/2)
    total_nbr_stim_per_file,nbr_stim_per_electrode,nbr_electrodes,nbr_neurospheres,seq_len = recording_parameters(dirPath,offset)


    # create two arrays of size (N,8,8,3001), each one corresponding to one of the class
    raw1_reshaped = np.zeros((total_nbr_stim_per_file*nbr_files,nbr_electrodes,nbr_electrodes,seq_len+1))
    raw2_reshaped = np.zeros((total_nbr_stim_per_file*nbr_files,nbr_electrodes,nbr_electrodes,seq_len+1))

    # fill these arrays with corresponding values from files
    for start_exp_index in range(nbr_files):
        print(f'experiment number: {start_exp_index}')
        filename1 = f'{dirPath}/exp_{start_exp_index}_0_{offset}.npz'
        filename2 = f'{dirPath}/exp_{start_exp_index}_1_{offset}.npz'

        if not os.path.exists(filename1) or not os.path.exists(filename2):
            print(f"Files not found: {filename1} or {filename2}")
            continue  # Skip this iteration if files do not exist

        raw1 = load_raw_data(filename1)
        raw2 = load_raw_data(filename2)

        # iterate through electrode stimulated and neurospheres
        for electrode in range(nbr_electrodes):
            #nbr_neurospheres = int(raw1[electrode].shape[1]/8)
            # N: number of reptition of the stimulus
            N = raw1[1].shape[0]
            for i in range(nbr_neurospheres):
                j = nbr_electrodes*i
                raw1_one_file[N*i:N*(i+1),electrode] = raw1[electrode][:N,j:j+nbr_electrodes]
                raw2_one_file[N*i:N*(i+1),electrode] = raw2[electrode][:N,j:j+nbr_electrodes]
        raw1_reshaped[total_nbr_stim_per_file*start_exp_index:total_nbr_stim_per_file*(start_exp_index+1),:,:,:seq_len] = raw1_one_file
        raw2_reshaped[total_nbr_stim_per_file*start_exp_index:total_nbr_stim_per_file*(start_exp_index+1),:,:,:seq_len] = raw2_one_file

    # append label
    print("append label")
    raw1_reshaped[:,:,:,seq_len] = np.zeros((raw1_reshaped.shape[0],nbr_electrodes,nbr_electrodes))
    raw2_reshaped[:,:,:,seq_len] = np.ones((raw2_reshaped.shape[0],nbr_electrodes,nbr_electrodes))

    #return full dataset
    print("return dataset")
    dataset = np.zeros((total_nbr_stim_per_file*nbr_files*2,nbr_electrodes,nbr_electrodes,seq_len+1))
    dataset[:total_nbr_stim_per_file*nbr_files] = raw1_reshaped
    dataset[total_nbr_stim_per_file*nbr_files:] = raw2_reshaped
    return dataset.astype(np.float32)

def recording_parameters(dirPath: str, offset: int):
    """
    INPUT:
    dirPath: [str] path to directory containing .npz files
    offset: [int] time delay before recording after a stimulus (unused in this function)
    OUTPUT:
    total_nbr_stim_per_file: [int] total number of stimulations per file
    nbr_stim_per_electrode: [int] number of times the experiment is repeated within a file
    nbr_electrodes: [int] number of electrodes (usually 8)
    nbr_neurospheres: [int] number of neurospheres considered
    seq_len: [int] length of the data in the time dimension
    """

    npz_files = [f for f in os.listdir(dirPath) if f.endswith('.npz')]
    if not npz_files:
        raise FileNotFoundError("No .npz files found in the directory")

    first_file = os.path.join(dirPath, npz_files[0])
    file_data = load_raw_data(first_file)

    if not isinstance(file_data, np.ndarray):
        raise ValueError("Loaded data is not a numpy array.")
    if file_data.ndim != 2:
        raise ValueError(f"Unexpected number of dimensions in the data: {file_data.ndim}")

    # Assuming first dimension is trials and second dimension is time points
    nbr_trials = file_data.shape[0]
    time_points = file_data.shape[1]

    # Assuming 8 electrodes, 1 neurosphere, and 1 stimulation per electrode
    nbr_stim_per_electrode = 1
    nbr_electrodes = 8
    nbr_neurospheres = 1
    total_nbr_stim_per_file = nbr_trials
    seq_len = time_points

    return total_nbr_stim_per_file, nbr_stim_per_electrode, nbr_electrodes, nbr_neurospheres, seq_len


def convertFlatRaw4x3x4x3(raw: np.array):
    """
    INPUT:
    raw: [np.array] array of shape (N,1,8,8,M) with N the number of trials and M the length of the sequence
    OUTPUT:
    array4x3: [np.array] array of shape (N,1,4,3,4,3,M)
    """
    map8 = np.array([[1,0],[0,1],[1,1],[1,2],[2,2],[2,1],[3,1],[2,0]])
    array4x3 = torch.zeros((raw.shape[0],1,4,3,4,3,raw.shape[-1]),dtype=torch.float32)
    for i in range(8):
        for j in range(8):
            array4x3[:,0,map8[i,0],map8[i,1],map8[j,0],map8[j,1],:] = raw[:,0,i,j,:]
    return array4x3

def getListFiles(dirPath):
    listFiles = filter(os.path.isfile,glob.glob(f'{dirPath}/*.npz'))
    listFiles = sorted(listFiles, key=os.path.getmtime)
    return listFiles
        
def read_in_data(dirPath):
    offset_trigger_ms = 5
    listFiles = getListFiles(dirPath)
    dataset = create_array(dirPath, offset_trigger_ms, listFiles)
    print("converting spatial position")
    dataset = convertFlatRaw4x3x4x3(dataset)
    dataset = np.expand_dims(dataset,1)
    return dataset

# Define the path to your .nwb file and the output directory for .npz files
nwb_file_path = '/home/vincent/AAA_projects/UnlimitedResearchCooperative/Synthetic Intelligence/human-cortical-organoid-signal-analysis/IntanToNWBtoNPZ/data1.nwb'  # path to your .nwb file
output_dir = '/home/vincent/AAA_projects/UnlimitedResearchCooperative/Synthetic Intelligence/human-cortical-organoid-signal-analysis/IntanToNWBtoNPZ'  # path to the directory where you want to save .npz files

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Call the function to extract data from .nwb and save as .npz
extract_and_save_npz(nwb_file_path, output_dir)

# Now call the read_in_data function with the directory containing your .npz files
dataset = read_in_data(output_dir)

Data extraction and saving completed.
experiment number: 0
Files not found: /home/vincent/AAA_projects/UnlimitedResearchCooperative/Synthetic Intelligence/human-cortical-organoid-signal-analysis/IntanToNWBtoNPZ/exp_0_0_5.npz or /home/vincent/AAA_projects/UnlimitedResearchCooperative/Synthetic Intelligence/human-cortical-organoid-signal-analysis/IntanToNWBtoNPZ/exp_0_1_5.npz
experiment number: 1
Files not found: /home/vincent/AAA_projects/UnlimitedResearchCooperative/Synthetic Intelligence/human-cortical-organoid-signal-analysis/IntanToNWBtoNPZ/exp_1_0_5.npz or /home/vincent/AAA_projects/UnlimitedResearchCooperative/Synthetic Intelligence/human-cortical-organoid-signal-analysis/IntanToNWBtoNPZ/exp_1_1_5.npz
append label
return dataset
converting spatial position


IndexError: too many indices for array: array is 4-dimensional, but 5 were indexed