In [1]:
import mne
from netCDF4 import Dataset
import json
import dask.array as da
from dask import delayed
import os, logging
from services.utils.timing import TimingContext
import pyarrow as pa
from services.delta_lake import Duck_Lake
from prefect import flow, task
from prefect_dask import DaskTaskRunner
from dataclasses import dataclass, asdict
from typing import List

logging.basicConfig()
logging.root.setLevel(logging.INFO)

ducklake = Duck_Lake()

my_edf_file_path = os.path.join(
    os.environ["CONTAINER_FILE_STORAGE_PATH"],
    "test33_HypoactiveHeidi_05_DAY1_PROCESSED.edf",
)
my_parquet_output_dir = os.path.join(os.environ["CONTAINER_FILE_STORAGE_PATH"], "test")

misc_channels = [
    "pitch",
    "roll",
    "heading",
    "GyrZ",
    "MagZ",
    "Tag_On",
    "Depth",
    "MagX",
    "MagY",
]

@dataclass
class SignalSchema:
    signal_name: str
    frequency: float
    start_time: float
    data: List[float]

@task
def read_signal(
    edf_file_path,
    signal_name,
):
    """Function to read a single signal from an EDF file."""
    raw = mne.io.read_raw_edf(edf_file_path, preload=False)
    signal = raw.pick(signal_name).get_data()
    
    return SignalSchema(
        signal_name=signal_name,
        frequency=raw.info["sfreq"],
        start_time=pa.scalar(raw.info["meas_date"].timestamp(), type=pa.timestamp('us', tz="UTC")),
        # start_time=raw.info["meas_date"],
        data=signal[0],
    )


@flow
def process_edf(
    edf_file_path: str,
    schema: pa.schema,
    misc_channels: List[str] = misc_channels,
):
    with TimingContext("EDF Read"):
        raw = mne.io.read_raw_edf(edf_file_path, preload=False)

        channel_types = dict()
        
        for k in raw.ch_names:
            if k in misc_channels:
                channel_types[k] = "misc"
            else:
                channel_types[k] = "eeg"
        raw.set_channel_types(channel_types)
        
        channels_to_use = [ch for ch in raw.ch_names if ch not in misc_channels]
        
        buff = []
        for signal_name in channels_to_use:
            signal = read_signal(edf_file_path, signal_name)
            buff.append(asdict(signal))
        
        _ = buff
        table = pa.Table.from_pylist(buff, schema=schema)
        
        ducklake.write_to_delta(
            data=table,
            schema=schema,
            mode="append",
            partition_by=['signal_name'],
            name="test",
            description="test"
        )


schema = pa.schema(
    [
        pa.field("signal_name", pa.string()),
        pa.field("frequency", pa.float64()),
        pa.field("start_time", pa.timestamp('us', tz="UTC")),
        pa.field("data", pa.list_(pa.float64())),
    ]
)

with TimingContext("Main"):
    process_edf(my_edf_file_path, schema)


  warn(


  warn(


Extracting EDF parameters from /data/files/test33_HypoactiveHeidi_05_DAY1_PROCESSED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  raw = mne.io.read_raw_edf(edf_file_path, preload=False)
  value = np.nanmax([_prefilter_float(x) for x in values])
  raw = mne.io.read_raw_edf(edf_file_path, preload=False)
  value = np.nanmin([_prefilter_float(x) for x in values])
  raw.set_channel_types(channel_types)


Extracting EDF parameters from /data/files/test33_HypoactiveHeidi_05_DAY1_PROCESSED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  value = np.nanmax([_prefilter_float(x) for x in values])
  value = np.nanmin([_prefilter_float(x) for x in values])
01:21:43.405 | [36mINFO[0m    | Task run 'read_signal-0' - Finished in state [32mCompleted[0m()


Extracting EDF parameters from /data/files/test33_HypoactiveHeidi_05_DAY1_PROCESSED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  value = np.nanmax([_prefilter_float(x) for x in values])
  value = np.nanmin([_prefilter_float(x) for x in values])
01:21:57.901 | [36mINFO[0m    | Task run 'read_signal-1' - Finished in state [32mCompleted[0m()


Extracting EDF parameters from /data/files/test33_HypoactiveHeidi_05_DAY1_PROCESSED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  value = np.nanmax([_prefilter_float(x) for x in values])
  value = np.nanmin([_prefilter_float(x) for x in values])
01:22:14.331 | [36mINFO[0m    | Task run 'read_signal-2' - Finished in state [32mCompleted[0m()


Extracting EDF parameters from /data/files/test33_HypoactiveHeidi_05_DAY1_PROCESSED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  value = np.nanmax([_prefilter_float(x) for x in values])
  value = np.nanmin([_prefilter_float(x) for x in values])
01:22:28.828 | [36mINFO[0m    | Task run 'read_signal-3' - Finished in state [32mCompleted[0m()


Extracting EDF parameters from /data/files/test33_HypoactiveHeidi_05_DAY1_PROCESSED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


01:22:42.551 | [36mINFO[0m    | Task run 'read_signal-4' - Finished in state [32mCompleted[0m()


Extracting EDF parameters from /data/files/test33_HypoactiveHeidi_05_DAY1_PROCESSED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


01:22:57.989 | [36mINFO[0m    | Task run 'read_signal-5' - Finished in state [32mCompleted[0m()


Extracting EDF parameters from /data/files/test33_HypoactiveHeidi_05_DAY1_PROCESSED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


01:23:11.265 | [36mINFO[0m    | Task run 'read_signal-6' - Finished in state [32mCompleted[0m()


Extracting EDF parameters from /data/files/test33_HypoactiveHeidi_05_DAY1_PROCESSED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


01:23:22.171 | [36mINFO[0m    | Task run 'read_signal-7' - Finished in state [32mCompleted[0m()


Extracting EDF parameters from /data/files/test33_HypoactiveHeidi_05_DAY1_PROCESSED.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


01:23:31.400 | [36mINFO[0m    | Task run 'read_signal-8' - Finished in state [32mCompleted[0m()


: 

: 

: 