# Print contents

In [23]:
from pynwb import NWBHDF5IO

def list_nwb_components(nwb_file_path):
    with NWBHDF5IO(nwb_file_path, 'r') as io:
        nwbfile = io.read()

        print("Acquisition Groups:")
        acquisition = nwbfile.acquisition
        for name in acquisition:
            print(f" - {name}")

        if hasattr(nwbfile, 'stimulus'):
            print("\nStimulus Groups:")
            stimulus = nwbfile.stimulus
            for name in stimulus:
                print(f" - {name}")

# Path to your .nwb file
nwb_file_path = 'data1.nwb'

# List all components
list_nwb_components(nwb_file_path)


Acquisition Groups:
 - ElectricalSeries

Stimulus Groups:
 - TimeSeries_amp_settle
 - TimeSeries_charge_recovery
 - TimeSeries_compliance_limit
 - TimeSeries_stimulation


In [24]:
from pynwb import NWBHDF5IO

# Replace with the path to your .nwb file
nwb_file_path = 'data1.nwb'

# Function to print details of a TimeSeries object
def print_timeseries_details(name, timeseries):
    print(f"\n{name}:")
    print(f" - Comments: {timeseries.comments}")
    print(f" - Description: {timeseries.description}")
    print(f" - Unit: {timeseries.unit}")
    print(f" - Data shape: {timeseries.data.shape}")
    print(f" - Timestamps shape: {timeseries.timestamps.shape if timeseries.timestamps else 'No timestamps'}")

# Open the .nwb file using PyNWB
with NWBHDF5IO(nwb_file_path, 'r') as io:
    nwbfile = io.read()

    # Access and print details of the acquisition group
    print("Acquisition Groups:")
    acquisition = nwbfile.acquisition
    for name, timeseries in acquisition.items():
        print_timeseries_details(name, timeseries)

    # Access and print details of the stimulus group
    if hasattr(nwbfile, 'stimulus'):
        print("\nStimulus Groups:")
        for name, timeseries in nwbfile.stimulus.items():
            print_timeseries_details(name, timeseries)


Acquisition Groups:

ElectricalSeries:
 - Comments: voltage data recorded from the amplifiers of an Intan Technologies chip
 - Description: voltage data recorded from the amplifiers of an Intan Technologies chip
 - Unit: volts
 - Data shape: (65280, 64)
 - Timestamps shape: (65280,)

Stimulus Groups:

TimeSeries_amp_settle:
 - Comments: amplifier settle activity of an Intan Technologies chip
 - Description: amplifier settle activity of an Intan Technologies chip
 - Unit: digital event
 - Data shape: (65280, 64)
 - Timestamps shape: (65280,)

TimeSeries_charge_recovery:
 - Comments: charge recovery activity of an Intan Technologies chip
 - Description: charge recovery activity of an Intan Technologies chip
 - Unit: digital event
 - Data shape: (65280, 64)
 - Timestamps shape: (65280,)

TimeSeries_compliance_limit:
 - Comments: compliance limit activity of an Intan Technologies chip
 - Description: compliance limit activity of an Intan Technologies chip
 - Unit: digital event
 - Data sha

### Shape

In [26]:
from pynwb import NWBHDF5IO
import numpy as np

def extract_and_reshape_data(nwb_file_path, components):
    with NWBHDF5IO(nwb_file_path, 'r') as io:
        nwbfile = io.read()

        datasets = {}
        seq_len = 65280  # Number of time points
        electrodes = 64  # Total number of electrodes (8x8 grid)

        for component in components:
            # Attempt to extract data from both acquisition and stimulus groups
            timeseries = nwbfile.acquisition.get(component) or nwbfile.stimulus.get(component)
            if timeseries is None:
                print(f"Component {component} not found.")
                continue

            data = np.array(timeseries.data[:])
            
            # Check the shape of the data
            if data.ndim != 2 or data.shape[1] != electrodes:
                raise ValueError(f"Incorrect data shape in component {component}")

            # Reshape the data to (seq_len, 8, 8)
            reshaped_data = data.reshape((seq_len, 8, 8))
            datasets[component] = reshaped_data
            print(f"Processed {component}, reshaped data shape: {reshaped_data.shape}")

        return datasets

# Path to your .nwb file
nwb_file_path = 'data1.nwb'

# List of components to extract and reshape
components = [
    "ElectricalSeries", "TimeSeries_amp_settle", "TimeSeries_charge_recovery",
    "TimeSeries_compliance_limit", "TimeSeries_stimulation"
]

# Extract and reshape data
datasets = extract_and_reshape_data(nwb_file_path, components)

# Print the shapes of the datasets
for component, data in datasets.items():
    print(f"{component} dataset shape: {data.shape}")


Processed ElectricalSeries, reshaped data shape: (65280, 8, 8)
Processed TimeSeries_amp_settle, reshaped data shape: (65280, 8, 8)
Processed TimeSeries_charge_recovery, reshaped data shape: (65280, 8, 8)
Processed TimeSeries_compliance_limit, reshaped data shape: (65280, 8, 8)
Processed TimeSeries_stimulation, reshaped data shape: (65280, 8, 8)
ElectricalSeries dataset shape: (65280, 8, 8)
TimeSeries_amp_settle dataset shape: (65280, 8, 8)
TimeSeries_charge_recovery dataset shape: (65280, 8, 8)
TimeSeries_compliance_limit dataset shape: (65280, 8, 8)
TimeSeries_stimulation dataset shape: (65280, 8, 8)


### Print first 10 rows of each component data

In [27]:
# Function to print the first 10 rows of each dataset
def print_first_10_rows(datasets):
    for component, data in datasets.items():
        print(f"\nFirst 10 rows of {component}:")
        for row in range(10):  # Iterate over the first 10 timepoints
            print(f"Timepoint {row + 1}:\n{data[row, :, :]}")  # Print each 8x8 grid

# Print the first 10 rows of each dataset
print_first_10_rows(datasets)



First 10 rows of ElectricalSeries:
Timepoint 1:
[[ 2.53500002e-06  7.21500010e-06 -3.89999997e-07  3.90000014e-06
   4.29000011e-06 -7.01999988e-06 -1.13099995e-05  2.73000001e-06]
 [ 3.50999994e-06  1.40399998e-05 -7.01999988e-06  1.36500000e-06
   3.11999997e-06 -2.14500005e-06  1.95000007e-06  9.75000034e-07]
 [ 2.53500002e-06  7.21500010e-06 -3.89999997e-07  3.90000014e-06
   4.29000011e-06 -7.01999988e-06 -1.13099995e-05  2.73000001e-06]
 [ 3.50999994e-06  1.40399998e-05 -7.01999988e-06  1.36500000e-06
   3.11999997e-06 -2.14500005e-06  1.95000007e-06  9.75000034e-07]
 [ 2.53500002e-06  7.21500010e-06 -3.89999997e-07  3.90000014e-06
   4.29000011e-06 -7.01999988e-06 -1.13099995e-05  2.73000001e-06]
 [ 3.50999994e-06  1.40399998e-05 -7.01999988e-06  1.36500000e-06
   3.11999997e-06 -2.14500005e-06  1.95000007e-06  9.75000034e-07]
 [ 2.53500002e-06  7.21500010e-06 -3.89999997e-07  3.90000014e-06
   4.29000011e-06 -7.01999988e-06 -1.13099995e-05  2.73000001e-06]
 [ 3.50999994e-06  1

# Incase of errors, delete all files in ready_for_shaping dir 

In [18]:
import os

# Specify the directory path
directory_path = '/home/vincent/AAA_projects/UnlimitedResearchCooperative/Synthetic_Intelligence_Labs/human-cortical-organoid-signal-analysis/IntanToNWBtoNPZ/ready_for_shaping'

# List all files in the directory
files = os.listdir(directory_path)

# Iterate through the list of files and delete each one
for file in files:
    file_path = os.path.join(directory_path, file)
    if os.path.isfile(file_path):
        os.remove(file_path)

### Alternative (needs work)

In [16]:
from pynwb import NWBHDF5IO
import numpy as np
import os
import glob
import torch as torch

# Define the function to load raw data from .npz files
def load_raw_data(filename: str):
    with np.load(filename) as loaded:
        # Assuming the .npz file contains an array with the key 'data'
        data = loaded['data']
    return data
    
# Function to extract data from a TimeSeries object
def extract_data(time_series):
    return {
        'data': np.array(time_series.data[:]),
        'timestamps': np.array(tizme_series.timestamps[:]) if time_series.timestamps else None,
        'unit': time_series.unit,
        'comments': time_series.comments
    }

# Function to extract and save data from NWB file as NPZ
def extract_and_save_npz(nwb_file_path, output_dir, offset=5):  # Added offset parameter with default value
    with NWBHDF5IO(nwb_file_path, 'r') as io:
        nwbfile = io.read()

        # Extract and save acquisition and stimulus data as NPZ
        for group_name in ('acquisition', 'stimulus'):
            if hasattr(nwbfile, group_name):
                group_data = getattr(nwbfile, group_name)
                for name, timeseries in group_data.items():
                    data = extract_data(timeseries)['data']
                    
                    # Generate file names based on your naming convention
                    for i in range(data.shape[1]):
                        for group_type in [0, 1]:  # Assuming you have two types of groups
                            file_name = f'exp_{i}_{group_type}_{offset}.npz'  # offset is now defined
                            # Generate and save data for each file
                            np.savez_compressed(os.path.join(output_dir, file_name), data=data[:, i])

    print("Data extraction and saving completed.")

# Replace with the path to your .nwb file
nwb_file_path = 'data1.nwb'

# Directory to save the extracted data
output_dir = 'ready_for_shaping'  # Adjust this path as needed

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Call the function to extract data from .nwb and save as .npz
extract_and_save_npz(nwb_file_path, output_dir)

def create_array(dirPath: str, offset: int, listFiles: list):
    """
    INPUT:
    dirPath: [str] path to directory
    offset: [int] time delay before recording after a stimulus
    OUTPUT: 
    dataset: [np.array] an array of shape (N, 8, 8, T) where T is the number of time points
    """
    nbr_files = int(len(listFiles) / 2)
    seq_len = 65280  # Number of time points

    # Initialize an array to store all data, assuming N is the total number of stimulation groups
    N = nbr_files * 2
    dataset = np.zeros((N, 8, 8, seq_len))

    for start_exp_index in range(nbr_files):
        print(f'experiment number: {start_exp_index}')
        for group_type in [0, 1]:
            filename = f'{dirPath}/exp_{start_exp_index}_{group_type}_{offset}.npz'
            if not os.path.exists(filename):
                print(f"File not found: {filename}")
                continue

            raw_data = load_raw_data(filename)
            for t in range(seq_len):
                # Reshape each time point's data into an 8x8 grid
                reshaped_data = raw_data[t].reshape((8, 8))
                dataset[start_exp_index * 2 + group_type, :, :, t] = reshaped_data

    return dataset.astype(np.float32)




def recording_parameters(dirPath: str, offset: int):
    """
    INPUT:
    dirPath: [str] path to directory containing .npz files
    offset: [int] time delay before recording after a stimulus (unused in this function)
    OUTPUT:
    total_nbr_stim_per_file: [int] total number of stimulations per file
    nbr_stim_per_electrode: [int] number of times the experiment is repeated within a file
    nbr_electrodes: [int] number of electrodes (usually 8)
    nbr_neurospheres: [int] number of neurospheres considered
    seq_len: [int] length of the data in the time dimension
    """

    npz_files = [f for f in os.listdir(dirPath) if f.endswith('.npz')]
    if not npz_files:
        raise FileNotFoundError("No .npz files found in the directory")

    # Use the first file to determine the shape
    first_file = os.path.join(dirPath, npz_files[0])
    file_data = load_raw_data(first_file)

    if not isinstance(file_data, np.ndarray):
        raise ValueError("Loaded data is not a numpy array.")

    # Assuming the data is 2D where one dimension is time
    if file_data.ndim == 2:
        nbr_trials = file_data.shape[0]
        seq_len = file_data.shape[1]
    else:
        raise ValueError("Unexpected shape of data in .npz file.")

    # Assuming 8 electrodes, 1 neurosphere, and 1 stimulation per electrode
    nbr_stim_per_electrode = 1
    nbr_electrodes = 8
    nbr_neurospheres = 1
    total_nbr_stim_per_file = nbr_trials

    return total_nbr_stim_per_file, nbr_stim_per_electrode, nbr_electrodes, nbr_neurospheres, seq_len


def convertFlatRaw4x3x4x3(raw: np.array):
    """
    Convert a 5D numpy array of shape (N, 1, 8, 8, 2) to a 7D numpy array of shape (N, 1, 4, 3, 4, 3, 2).
    """
    map8 = np.array([[1, 0], [0, 1], [1, 1], [1, 2], [2, 2], [2, 1], [3, 1], [2, 0]])
    N, _, _, _, last_dim = raw.shape  # Correctly adjusted to match the shape of the input array
    array4x3 = np.zeros((N, 1, 4, 3, 4, 3, last_dim))

    for i in range(8):
        for j in range(8):
            array4x3[:, 0, map8[i, 0], map8[i, 1], map8[j, 0], map8[j, 1], :] = raw[:, 0, i, j, :]

    return array4x3

def getListFiles(dirPath):
    listFiles = filter(os.path.isfile,glob.glob(f'{dirPath}/*.npz'))
    listFiles = sorted(listFiles, key=os.path.getmtime)
    return listFiles
        
def read_in_data(dirPath):
    offset_trigger_ms = 5
    listFiles = getListFiles(dirPath)
    dataset = create_array(dirPath, offset_trigger_ms, listFiles)
    print("converting spatial position")
    dataset = convertFlatRaw4x3x4x3(dataset)
    dataset = np.expand_dims(dataset,1)
    return dataset

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Call the function to extract data from .nwb and save as .npz
extract_and_save_npz(nwb_file_path, output_dir, offset=5)  # Pass the offset value here

# After creating the dataset
dataset = read_in_data(output_dir)
print("Dataset shape before converting:", dataset.shape)  # Should now be a 5D array
converted_dataset = convertFlatRaw4x3x4x3(dataset)
print("Converted dataset shape:", converted_dataset.shape)  # Should now be the correct 7D array

Data extraction and saving completed.
Data extraction and saving completed.


ValueError: Incorrect data shape in file ready_for_shaping/exp_0_0_5.npz