# Reshape the data for a single spheroid

# Data extraction

# Extract nwb file contents and save as np

### Alternative

In [None]:
from pynwb import NWBHDF5IO
import numpy as np
import os

def process_nwb_data(nwb_file_path: str):
    with NWBHDF5IO(nwb_file_path, 'r') as io:
        nwbfile = io.read()
        dataset = []

        for group_name in ('acquisition', 'stimulus'):
            if hasattr(nwbfile, group_name):
                group_data = getattr(nwbfile, group_name)
                for name, timeseries in group_data.items():
                    data = np.array(timeseries.data[:])
                    print(f"Data shape for {name}: {data.shape}")  # Debugging line

                    try:
                        # Assuming data is in the format: electrodes x seq_len
                        electrodes = int(np.sqrt(data.shape[0]))
                        seq_len = data.shape[1]
                        reshaped_data = data.reshape((1, electrodes, electrodes, seq_len))

                        class_label = np.zeros((1, electrodes, electrodes, 1)) if group_name == 'acquisition' else np.ones((1, electrodes, electrodes, 1))
                        combined_data = np.concatenate((reshaped_data, class_label), axis=-1)
                        dataset.append(combined_data)
                    except ValueError as e:
                        print(f"Error reshaping data for {name}: {e}")

        if dataset:
            dataset = np.concatenate(dataset, axis=0)
        else:
            raise ValueError("No valid data found in NWB file.")

    return dataset.astype(np.float32)


# Convert dataset to required 7D format
def convert_to_7d_format(dataset: np.array):
    """
    Converts a 5D dataset to a 7D format.

    :param dataset: The input dataset of shape (N, 1, 8, 8, M)
    :return: Converted dataset of shape (N, 1, 4, 3, 4, 3, M)
    """
    map8 = np.array([[1, 0], [0, 1], [1, 1], [1, 2], [2, 2], [2, 1], [3, 1], [2, 0]])
    N, _, _, _, M = dataset.shape
    converted_dataset = np.zeros((N, 1, 4, 3, 4, 3, M))

    for i in range(8):
        for j in range(8):
            converted_dataset[:, 0, map8[i, 0], map8[i, 1], map8[j, 0], map8[j, 1], :] = dataset[:, 0, i, j, :]

    return converted_dataset

# Main processing function
def process_data_from_nwb(nwb_file_path: str):
    """
    Main function to process data from NWB file and convert it to the required format.

    :param nwb_file_path: Path to the .nwb file
    """
    dataset = process_nwb_data(nwb_file_path)
    print("Dataset shape after initial processing:", dataset.shape)

    converted_dataset = convert_to_7d_format(dataset)
    print("Converted dataset shape:", converted_dataset.shape)

    return converted_dataset

# Path to your .nwb file
nwb_file_path = 'data1.nwb'

# Process the data
final_dataset = process_data_from_nwb('data1.nwb')

### Primary

In [8]:
from pynwb import NWBHDF5IO
import numpy as np
import os
import glob
import torch as torch

# Define the function to load raw data from .npz files
def load_raw_data(filename: str):
    with np.load(filename) as loaded:
        # Assuming the .npz file contains an array with the key 'data'
        data = loaded['data']
    return data
    
# Function to extract data from a TimeSeries object
def extract_data(time_series):
    return {
        'data': np.array(time_series.data[:]),
        'timestamps': np.array(time_series.timestamps[:]) if time_series.timestamps else None,
        'unit': time_series.unit,
        'comments': time_series.comments
    }

# Function to extract and save data from NWB file as NPZ
def extract_and_save_npz(nwb_file_path, output_dir, offset=5):  # Added offset parameter with default value
    with NWBHDF5IO(nwb_file_path, 'r') as io:
        nwbfile = io.read()

        # Extract and save acquisition and stimulus data as NPZ
        for group_name in ('acquisition', 'stimulus'):
            if hasattr(nwbfile, group_name):
                group_data = getattr(nwbfile, group_name)
                for name, timeseries in group_data.items():
                    data = extract_data(timeseries)['data']
                    
                    # Generate file names based on your naming convention
                    for i in range(data.shape[1]):
                        for group_type in [0, 1]:  # Assuming you have two types of groups
                            file_name = f'exp_{i}_{group_type}_{offset}.npz'  # offset is now defined
                            # Generate and save data for each file
                            np.savez_compressed(os.path.join(output_dir, file_name), data=data[:, i])

    print("Data extraction and saving completed.")

# Replace with the path to your .nwb file
nwb_file_path = 'data1.nwb'

# Directory to save the extracted data
output_dir = 'ready_for_shaping'  # Adjust this path as needed

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Call the function to extract data from .nwb and save as .npz
extract_and_save_npz(nwb_file_path, output_dir)

def create_array(dirPath: str, offset: int, listFiles: list):
    """
    INPUT:
    dirPath: [str] path to directory
    offset: [int] time delay before recording after a stimulus
    OUTPUT: 
    dataset: [np.array] an array of shape (N, 8, 8, 3001) containing the data
    """
    nbr_files = int(len(listFiles) / 2)
    total_nbr_stim_per_file, nbr_stim_per_electrode, nbr_electrodes, nbr_neurospheres, seq_len = recording_parameters(dirPath, offset)

    # Initialize an array to store all data
    dataset = np.zeros((total_nbr_stim_per_file * nbr_files * 2, nbr_electrodes, nbr_electrodes, seq_len + 1))

    for start_exp_index in range(nbr_files):
        print(f'experiment number: {start_exp_index}')
        for group_type in [0, 1]:  # Loop over group types
            filename = f'{dirPath}/exp_{start_exp_index}_{group_type}_{offset}.npz'
            if not os.path.exists(filename):
                print(f"File not found: {filename}")
                continue

            raw_data = load_raw_data(filename)
            # Set the label based on group type and append to the dataset
            label = group_type
            end_index = (start_exp_index * 2 + group_type + 1) * total_nbr_stim_per_file
            start_index = end_index - total_nbr_stim_per_file
            dataset[start_index:end_index, :, :, :seq_len] = raw_data
            dataset[start_index:end_index, :, :, seq_len] = label

    return dataset.astype(np.float32)

def recording_parameters(dirPath: str, offset: int):
    """
    INPUT:
    dirPath: [str] path to directory containing .npz files
    offset: [int] time delay before recording after a stimulus (unused in this function)
    OUTPUT:
    total_nbr_stim_per_file: [int] total number of stimulations per file
    nbr_stim_per_electrode: [int] number of times the experiment is repeated within a file
    nbr_electrodes: [int] number of electrodes (usually 8)
    nbr_neurospheres: [int] number of neurospheres considered
    seq_len: [int] length of the data in the time dimension
    """

    npz_files = [f for f in os.listdir(dirPath) if f.endswith('.npz')]
    if not npz_files:
        raise FileNotFoundError("No .npz files found in the directory")

    first_file = os.path.join(dirPath, npz_files[0])
    file_data = load_raw_data(first_file)

    if not isinstance(file_data, np.ndarray):
        raise ValueError("Loaded data is not a numpy array.")

    # Adjust the logic to handle different data shapes
    if file_data.ndim == 1:
        # Data is 1-dimensional
        nbr_trials = len(file_data)
        time_points = 1  # Assuming each point is a separate trial
    elif file_data.ndim == 2:
        # Data is 2-dimensional
        nbr_trials = file_data.shape[0]
        time_points = file_data.shape[1]
    else:
        raise ValueError(f"Unexpected number of dimensions in the data: {file_data.ndim}")

    # Assuming 8 electrodes, 1 neurosphere, and 1 stimulation per electrode
    nbr_stim_per_electrode = 1
    nbr_electrodes = 8
    nbr_neurospheres = 1
    total_nbr_stim_per_file = nbr_trials
    seq_len = time_points

    return total_nbr_stim_per_file, nbr_stim_per_electrode, nbr_electrodes, nbr_neurospheres, seq_len

def convertFlatRaw4x3x4x3(raw: np.array):
    """
    Convert a 5D numpy array of shape (N, 1, 8, 8, 2) to a 7D numpy array of shape (N, 1, 4, 3, 4, 3, 2).
    """
    map8 = np.array([[1, 0], [0, 1], [1, 1], [1, 2], [2, 2], [2, 1], [3, 1], [2, 0]])
    N, _, _, _, last_dim = raw.shape  # Correctly adjusted to match the shape of the input array
    array4x3 = np.zeros((N, 1, 4, 3, 4, 3, last_dim))

    for i in range(8):
        for j in range(8):
            array4x3[:, 0, map8[i, 0], map8[i, 1], map8[j, 0], map8[j, 1], :] = raw[:, 0, i, j, :]

    return array4x3

def getListFiles(dirPath):
    listFiles = filter(os.path.isfile,glob.glob(f'{dirPath}/*.npz'))
    listFiles = sorted(listFiles, key=os.path.getmtime)
    return listFiles
        
def read_in_data(dirPath):
    offset_trigger_ms = 5
    listFiles = getListFiles(dirPath)
    dataset = create_array(dirPath, offset_trigger_ms, listFiles)
    print("converting spatial position")
    dataset = convertFlatRaw4x3x4x3(dataset)
    dataset = np.expand_dims(dataset,1)
    return dataset

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Call the function to extract data from .nwb and save as .npz
extract_and_save_npz(nwb_file_path, output_dir, offset=5)  # Pass the offset value here

# After creating the dataset
dataset = read_in_data(output_dir)
print("Dataset shape before converting:", dataset.shape)  # Should now be a 5D array
converted_dataset = convertFlatRaw4x3x4x3(dataset)
print("Converted dataset shape:", converted_dataset.shape)  # Should now be the correct 7D array

Data extraction and saving completed.
Data extraction and saving completed.
experiment number: 0


ValueError: could not broadcast input array from shape (65280,) into shape (65280,8,8,1)

# Delete all files in ready_for_shaping dir incase of errors

In [9]:
import os

# Specify the directory path
directory_path = '/home/vincent/AAA_projects/UnlimitedResearchCooperative/Synthetic_Intelligence_Labs/human-cortical-organoid-signal-analysis/IntanToNWBtoNPZ/ready_for_shaping'

# List all files in the directory
files = os.listdir(directory_path)

# Iterate through the list of files and delete each one
for file in files:
    file_path = os.path.join(directory_path, file)
    if os.path.isfile(file_path):
        os.remove(file_path)


# Print contents

In [2]:
from pynwb import NWBHDF5IO

# Replace with the path to your .nwb file
nwb_file_path = 'data1.nwb'

# Function to print details of a TimeSeries object
def print_timeseries_details(name, timeseries):
    print(f"\n{name}:")
    print(f" - Comments: {timeseries.comments}")
    print(f" - Description: {timeseries.description}")
    print(f" - Unit: {timeseries.unit}")
    print(f" - Data shape: {timeseries.data.shape}")
    print(f" - Timestamps shape: {timeseries.timestamps.shape if timeseries.timestamps else 'No timestamps'}")

# Open the .nwb file using PyNWB
with NWBHDF5IO(nwb_file_path, 'r') as io:
    nwbfile = io.read()

    # Access and print details of the acquisition group
    print("Acquisition Groups:")
    acquisition = nwbfile.acquisition
    for name, timeseries in acquisition.items():
        print_timeseries_details(name, timeseries)

    # Access and print details of the stimulus group
    if hasattr(nwbfile, 'stimulus'):
        print("\nStimulus Groups:")
        for name, timeseries in nwbfile.stimulus.items():
            print_timeseries_details(name, timeseries)


Acquisition Groups:

ElectricalSeries:
 - Comments: voltage data recorded from the amplifiers of an Intan Technologies chip
 - Description: voltage data recorded from the amplifiers of an Intan Technologies chip
 - Unit: volts
 - Data shape: (65280, 64)
 - Timestamps shape: (65280,)

Stimulus Groups:

TimeSeries_amp_settle:
 - Comments: amplifier settle activity of an Intan Technologies chip
 - Description: amplifier settle activity of an Intan Technologies chip
 - Unit: digital event
 - Data shape: (65280, 64)
 - Timestamps shape: (65280,)

TimeSeries_charge_recovery:
 - Comments: charge recovery activity of an Intan Technologies chip
 - Description: charge recovery activity of an Intan Technologies chip
 - Unit: digital event
 - Data shape: (65280, 64)
 - Timestamps shape: (65280,)

TimeSeries_compliance_limit:
 - Comments: compliance limit activity of an Intan Technologies chip
 - Description: compliance limit activity of an Intan Technologies chip
 - Unit: digital event
 - Data sha

NameError: name 'extract_and_save_npz' is not defined