# Reshape the data for a single spheroid

# Data extraction

# Extract nwb file contents and save as np

In [1]:
from pynwb import NWBHDF5IO
import numpy as np
import os
import glob
import torch as torch

# Define the function to load raw data from .npz files
def load_raw_data(filename: str):
    with np.load(filename) as loaded:
        # Assuming the .npz file contains an array with the key 'data'
        data = loaded['data']
    return data
    
# Function to extract data from a TimeSeries object
def extract_data(time_series):
    return {
        'data': np.array(time_series.data[:]),
        'timestamps': np.array(time_series.timestamps[:]) if time_series.timestamps else None,
        'unit': time_series.unit,
        'comments': time_series.comments
    }

# Function to extract and save data from NWB file as NPZ
def extract_and_save_npz(nwb_file_path, output_dir, offset=5):  # Added offset parameter with default value
    with NWBHDF5IO(nwb_file_path, 'r') as io:
        nwbfile = io.read()

        # Extract and save acquisition and stimulus data as NPZ
        for group_name in ('acquisition', 'stimulus'):
            if hasattr(nwbfile, group_name):
                group_data = getattr(nwbfile, group_name)
                for name, timeseries in group_data.items():
                    data = extract_data(timeseries)['data']
                    
                    # Generate file names based on your naming convention
                    for i in range(data.shape[1]):
                        for group_type in [0, 1]:  # Assuming you have two types of groups
                            file_name = f'exp_{i}_{group_type}_{offset}.npz'  # offset is now defined
                            # Generate and save data for each file
                            np.savez_compressed(os.path.join(output_dir, file_name), data=data[:, i])

    print("Data extraction and saving completed.")

# Replace with the path to your .nwb file
nwb_file_path = 'data1.nwb'

# Directory to save the extracted data
output_dir = 'ready_for_shaping'  # Adjust this path as needed

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Call the function to extract data from .nwb and save as .npz
extract_and_save_npz(nwb_file_path, output_dir)

def create_array(dirPath:str, offset:int,listFiles:list):
    """
    INPUT:
    dirPath: [str] path to directory
    offset: [int] time delay before before recording after a stimulus
    OUTPUT: 
    dataset: [np.array] an array of shape (N,8,8,3001) containing the data
    """
    # number of pre and post stimulation files 
    nbr_files = int(len(listFiles) / 2)
    total_nbr_stim_per_file, nbr_stim_per_electrode, nbr_electrodes, nbr_neurospheres, seq_len = recording_parameters(dirPath, offset)

    # Initialize two arrays, each corresponding to one of the class
    raw1_reshaped = np.zeros((total_nbr_stim_per_file * nbr_files, nbr_electrodes, nbr_electrodes, seq_len + 1))
    raw2_reshaped = np.zeros((total_nbr_stim_per_file * nbr_files, nbr_electrodes, nbr_electrodes, seq_len + 1))

    for start_exp_index in range(nbr_files):
        print(f'experiment number: {start_exp_index}')
        
        filename1 = f'{dirPath}/exp_{start_exp_index}_0_{offset}.npz'
        filename2 = f'{dirPath}/exp_{start_exp_index}_1_{offset}.npz'

        if not os.path.exists(filename1) or not os.path.exists(filename2):
            print(f"Files not found: {filename1} or {filename2}")
            continue

        raw1 = load_raw_data(filename1)
        raw2 = load_raw_data(filename2)

        # Append label
        print("append label")
        raw1_reshaped[:, :, :, seq_len] = np.zeros((raw1_reshaped.shape[0], nbr_electrodes, nbr_electrodes))
        raw2_reshaped[:, :, :, seq_len] = np.ones((raw2_reshaped.shape[0], nbr_electrodes, nbr_electrodes))

    # Return full dataset in 5D format
    print("return dataset")
    dataset = np.zeros((total_nbr_stim_per_file * nbr_files * 2, 1, nbr_electrodes, nbr_electrodes, seq_len + 1))
    dataset[:total_nbr_stim_per_file * nbr_files, 0, :, :, :] = raw1_reshaped
    dataset[total_nbr_stim_per_file * nbr_files:, 0, :, :, :] = raw2_reshaped

    print("Dataset shape before converting:", dataset.shape)  # Should be (N, 1, 8, 8, M)

    return dataset.astype(np.float32)


def recording_parameters(dirPath: str, offset: int):
    """
    INPUT:
    dirPath: [str] path to directory containing .npz files
    offset: [int] time delay before recording after a stimulus (unused in this function)
    OUTPUT:
    total_nbr_stim_per_file: [int] total number of stimulations per file
    nbr_stim_per_electrode: [int] number of times the experiment is repeated within a file
    nbr_electrodes: [int] number of electrodes (usually 8)
    nbr_neurospheres: [int] number of neurospheres considered
    seq_len: [int] length of the data in the time dimension
    """

    npz_files = [f for f in os.listdir(dirPath) if f.endswith('.npz')]
    if not npz_files:
        raise FileNotFoundError("No .npz files found in the directory")

    first_file = os.path.join(dirPath, npz_files[0])
    file_data = load_raw_data(first_file)

    if not isinstance(file_data, np.ndarray):
        raise ValueError("Loaded data is not a numpy array.")

    # Adjust the logic to handle different data shapes
    if file_data.ndim == 1:
        # Data is 1-dimensional
        nbr_trials = len(file_data)
        time_points = 1  # Assuming each point is a separate trial
    elif file_data.ndim == 2:
        # Data is 2-dimensional
        nbr_trials = file_data.shape[0]
        time_points = file_data.shape[1]
    else:
        raise ValueError(f"Unexpected number of dimensions in the data: {file_data.ndim}")

    # Assuming 8 electrodes, 1 neurosphere, and 1 stimulation per electrode
    nbr_stim_per_electrode = 1
    nbr_electrodes = 8
    nbr_neurospheres = 1
    total_nbr_stim_per_file = nbr_trials
    seq_len = time_points

    return total_nbr_stim_per_file, nbr_stim_per_electrode, nbr_electrodes, nbr_neurospheres, seq_len

def convertFlatRaw4x3x4x3(raw: np.array):
    """
    Convert a 5D numpy array of shape (N, 1, 8, 8, 2) to a 7D numpy array of shape (N, 1, 4, 3, 4, 3, 2).
    """
    map8 = np.array([[1, 0], [0, 1], [1, 1], [1, 2], [2, 2], [2, 1], [3, 1], [2, 0]])
    N, _, _, _, last_dim = raw.shape  # Correctly adjusted to match the shape of the input array
    array4x3 = np.zeros((N, 1, 4, 3, 4, 3, last_dim))

    for i in range(8):
        for j in range(8):
            array4x3[:, 0, map8[i, 0], map8[i, 1], map8[j, 0], map8[j, 1], :] = raw[:, 0, i, j, :]

    return array4x3

def getListFiles(dirPath):
    listFiles = filter(os.path.isfile,glob.glob(f'{dirPath}/*.npz'))
    listFiles = sorted(listFiles, key=os.path.getmtime)
    return listFiles
        
def read_in_data(dirPath):
    offset_trigger_ms = 5
    listFiles = getListFiles(dirPath)
    dataset = create_array(dirPath, offset_trigger_ms, listFiles)
    print("converting spatial position")
    dataset = convertFlatRaw4x3x4x3(dataset)
    dataset = np.expand_dims(dataset,1)
    return dataset

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Call the function to extract data from .nwb and save as .npz
extract_and_save_npz(nwb_file_path, output_dir, offset=5)  # Pass the offset value here

# After creating the dataset
dataset = read_in_data(output_dir)
print("Dataset shape before converting:", dataset.shape)  # Should now be a 5D array
converted_dataset = convertFlatRaw4x3x4x3(dataset)
print("Converted dataset shape:", converted_dataset.shape)  # Should now be the correct 7D array

Data extraction and saving completed.
Data extraction and saving completed.
experiment number: 0
append label
experiment number: 1
append label
experiment number: 2
append label
experiment number: 3
append label
experiment number: 4
append label
experiment number: 5
append label
experiment number: 6
append label
experiment number: 7
append label
experiment number: 8
append label
experiment number: 9
append label
experiment number: 10
append label
experiment number: 11
append label
experiment number: 12
append label
experiment number: 13
append label
experiment number: 14
append label
experiment number: 15
append label
experiment number: 16
append label
experiment number: 17
append label
experiment number: 18
append label
experiment number: 19
append label
experiment number: 20
append label
experiment number: 21
append label
experiment number: 22
append label
experiment number: 23
append label
experiment number: 24
append label
experiment number: 25
append label
experiment number: 26
ap

ValueError: too many values to unpack (expected 5)

# Delete all files in ready_for_shaping dir incase of errors

In [5]:
import os

# Specify the directory path
directory_path = '/home/vincent/AAA_projects/UnlimitedResearchCooperative/Synthetic_Intelligence_Labs/human-cortical-organoid-signal-analysis/IntanToNWBtoNPZ/ready_for_shaping'

# List all files in the directory
files = os.listdir(directory_path)

# Iterate through the list of files and delete each one
for file in files:
    file_path = os.path.join(directory_path, file)
    if os.path.isfile(file_path):
        os.remove(file_path)


# Print contents

In [2]:
from pynwb import NWBHDF5IO

# Replace with the path to your .nwb file
nwb_file_path = 'data1.nwb'

# Function to print details of a TimeSeries object
def print_timeseries_details(name, timeseries):
    print(f"\n{name}:")
    print(f" - Comments: {timeseries.comments}")
    print(f" - Description: {timeseries.description}")
    print(f" - Unit: {timeseries.unit}")
    print(f" - Data shape: {timeseries.data.shape}")
    print(f" - Timestamps shape: {timeseries.timestamps.shape if timeseries.timestamps else 'No timestamps'}")

# Open the .nwb file using PyNWB
with NWBHDF5IO(nwb_file_path, 'r') as io:
    nwbfile = io.read()

    # Access and print details of the acquisition group
    print("Acquisition Groups:")
    acquisition = nwbfile.acquisition
    for name, timeseries in acquisition.items():
        print_timeseries_details(name, timeseries)

    # Access and print details of the stimulus group
    if hasattr(nwbfile, 'stimulus'):
        print("\nStimulus Groups:")
        for name, timeseries in nwbfile.stimulus.items():
            print_timeseries_details(name, timeseries)


Acquisition Groups:

ElectricalSeries:
 - Comments: voltage data recorded from the amplifiers of an Intan Technologies chip
 - Description: voltage data recorded from the amplifiers of an Intan Technologies chip
 - Unit: volts
 - Data shape: (65280, 64)
 - Timestamps shape: (65280,)

Stimulus Groups:

TimeSeries_amp_settle:
 - Comments: amplifier settle activity of an Intan Technologies chip
 - Description: amplifier settle activity of an Intan Technologies chip
 - Unit: digital event
 - Data shape: (65280, 64)
 - Timestamps shape: (65280,)

TimeSeries_charge_recovery:
 - Comments: charge recovery activity of an Intan Technologies chip
 - Description: charge recovery activity of an Intan Technologies chip
 - Unit: digital event
 - Data shape: (65280, 64)
 - Timestamps shape: (65280,)

TimeSeries_compliance_limit:
 - Comments: compliance limit activity of an Intan Technologies chip
 - Description: compliance limit activity of an Intan Technologies chip
 - Unit: digital event
 - Data sha

NameError: name 'extract_and_save_npz' is not defined