In [2]:
import mne
from netCDF4 import Dataset
import json
import dask.array as da
from dask import delayed
import os
from services.utils.timing import TimingContext
print("Imports complete")

my_edf_file_path = os.path.join(
    os.environ['LOCAL_DATA_PATH'],
    'test33_HypoactiveHeidi_05_DAY1_PROCESSED.edf'
)
my_parquet_output_dir = os.path.join(
    os.environ['LOCAL_DATA_PATH'],
    "test"
)


def read_signal(edf_file_path, channel_name):
    """Function to read a single signal from an EDF file."""
    raw = mne.io.read_raw_edf(edf_file_path, preload=False)
    signal = raw.pick(channel_name).get_data()
    return signal[0], raw.info['sfreq']  # Extract the 1D array and sampling rate

@delayed
def delayed_read_signal(edf_file_path, channel_name):
    return read_signal(edf_file_path, channel_name)

def convert_edf_to_netcdf(edf_file_path, netcdf_file_path):
    print("Starting to read EDF")
    raw = mne.io.read_raw_edf(edf_file_path, preload=False)
    
    # Extract signal labels and other metadata
    signal_labels = raw.ch_names
    num_signals = len(signal_labels)

    print("EDF read")

    print("Starting to create NetCDF")
    # Create a new NetCDF file
    nc_file = Dataset(netcdf_file_path, 'w', format='NETCDF4')

    # Parallelize reading of signals using Dask
    signals = [delayed_read_signal(edf_file_path, label) for label in signal_labels]

    # Process each signal independently
    for i, (label, signal_delayed) in enumerate(zip(signal_labels, signals)):
        print(f"Creating signal {label}")
        try:
            signal_data, sampling_rate = da.compute(signal_delayed)[0]
        except Exception as e:
            print(f"Error reading signal {label}")
            continue
        
        var_name = f'signal_{i}'
        dim_name = f'time_{i}'
        nc_file.createDimension(dim_name, len(signal_data))
        var = nc_file.createVariable(var_name, 'f4', (dim_name,))
        var[:] = signal_data  # Ensure the data is 1D
        var.setncattr('label', label)
        var.setncattr('sampling_rate', sampling_rate)
        print(f"Signal {label} created")

    # Add metadata as a JSON-encoded attribute
    metadata = {
        'num_signals': num_signals,
        'signal_labels': signal_labels,
    }
    nc_file.setncattr('metadata', json.dumps(metadata))

    # Close the NetCDF file
    nc_file.close()

    print("NetCDF created")

with TimingContext("Main"):
    convert_edf_to_netcdf(my_edf_file_path, "./day_1_data.nc")


ModuleNotFoundError: No module named 'pyedflib'