In [1]:
from typing import List
import numpy as np
import pandas as pd
from hdmf.backends.hdf5 import H5DataIO
from pynwb import NWBFile, TimeSeries
from pynwb.file import Subject
from pynwb.ecephys import ElectricalSeries, ElectrodeGroup, LFP
from pynwb.behavior import BehavioralEvents
import nixio
import regex as re
from nwbwidgets import nwb2widget
from usz_neuro_conversion.common import (
    SessionContext,
    NixContext,
    get_metadata_row,
    read_nix,
    get_date,
    write_nwb,
    read_nwb,
    standardize_sex,
)

In [2]:
def convert_nix_to_nwb(subject: int, session: int) -> SessionContext:
    ctx = create_context(subject, session)
    write_subject(ctx)
    ieeg_electrode_group = write_ieeg_electrodes(ctx)
    write_ieeg_measurements(ctx)
    write_trial_data(ctx)
    write_behavior(ctx)
    write_events(ctx)
    write_waveforms(ctx, ieeg_electrode_group)
    return ctx

In [3]:
def create_context(subject: int, session: int) -> SessionContext:
    nix_context = NixContext(
        subject, session, project="Human_Amygdala_MUA_sEEG_FearVideo"
    )
    nix = read_nix(nix_context)
    general = nix.sections["General"]
    nwb = NWBFile(
        session_description="Running experiment as described in the the experiment description",
        identifier=f"Human_Amygdala_MUA_sEEG_FearVideo_subject{subject:02}_session{session:02}",
        session_start_time=get_date(nix_context),
        lab=general.props["Recording location"].values[0],
        institution="Universitätsspital Zürich, 8091 Zurich, Switzerland",  # Broken UTF-8 in file
        related_publications=_get_related_publications(nix),
        experimenter="Fedele, Tommaso",  # TODO is this right?
        experiment_description=_get_experiment(nix),
        keywords=[
            "EEG",
            "iEEG",
            "LFP",
            "Epilepsy",
            "Memory",
            "Sternberg",
            "Verbal",
        ],  # TODO
    )
    return nix_context.to_session_context(nix, nwb)

In [4]:
def _get_experiment(nix: nixio.File) -> str:
    task = nix.sections["Task"].props
    task_name = task["Task name"].values[0]
    # Broken UTF-8 in file
    task_desc = "We used a paradigm comprising of a series of dynamic videos, which has been already validated in previous clinical investigations (Schacher et al., 2006). The videos were all silent and consisted of dynamic fearful faces and dynamic neutral landscapes, presented in an alternating order, in a block design. The paradigm included eight blocks of 75 short video clips (2–3 s) of fearful faces (aversive condition) and nine blocks of 72 short video clips (2–3 s) of neutral landscapes (neutral condition). Each block lasted 24 s in total, and contained short video clips without any intermission between consecutive videos. Video clips of fearful faces were extracted from thriller and horror movies and contained faces of actors showing fear, without being violent or aggressive. Video clips of neutral landscapes were chosen as a control condition, and were matched to the duration of the fearful faces videos (2–3 s). They included domestic landscapes which are posited to have a low emotional content and visual properties comparable to the emotional videos (Schacher et al., 2006). All videos were only included once. A panel of psychologists had evaluated the stimuli to ensure that they are suitable for the patients and that they do not include any episodes of violence or aggression (Schacher et al., 2006). In particular, we started with a set of 120 videos of fearful faces and reduced that to 72, by excluding videos where: (a) the actor’s face was not continuously visible (b) fear was not clearly recognized on the actor’s face (c) no other emotion was displayed (e.g. anger/surprise) and (d) the display of fear was intense. During electrophysiological recordings the videos were presented to the patients via a laptop screen, while during the fMRI scan they were presented through a tilted overhead mirror. In both cases, patients were instructed to pay attention to the videos and focus on the eyes of the actors during the clips containing faces. For the electrophysiological recordings, blocks were separated by a repeated baseline of 2 s taken from a neutral condition."
    task_url = task["Task URL"].values[0]
    return (
        f"Task Name: {task_name}\nTask Description: {task_desc}\nTask URL: {task_url}"
    )

In [5]:
def _get_related_publications(nix: nixio.File) -> List[str]:
    related_publications = (
        nix.sections["General"].sections["Related publications"].props
    )
    dois = related_publications["Publication DOI"].values
    return [doi.strip() for doi in dois]

In [6]:
def write_subject(ctx: SessionContext):
    metadata = get_metadata_row(ctx.to_nix_context())
    age = metadata["Age"]
    sex = metadata["Sex"]
    ctx.nwb.subject = Subject(
        subject_id=f"{ctx.subject:02}",
        age=f"P{int(age)}Y",
        description=_get_subject_description(ctx),
        species="Homo sapiens",
        sex=standardize_sex(sex),
    )

In [7]:
def _get_subject_description(ctx: SessionContext) -> str:
    metadata = get_metadata_row(ctx.to_nix_context())
    subject = ctx.nix.sections["Subject"].props
    handedness = metadata["Handedness"]
    pathology = metadata["Pathology"]
    depth_electrodes = subject["Depth electrodes"].values[0]
    electrodes_in_soz = metadata["Electrodes in seizure onset zone (SOZ)"]
    return f"Handedness: {handedness}\nPathology: {pathology}\nDepth electrodes: {depth_electrodes}\nElectrodes in seizure onset zone (SOZ): {electrodes_in_soz}"

In [13]:
def _get_session_data(ctx: SessionContext) -> nixio.Block:
    return ctx.nix.blocks[f"Data_Subject_{ctx.subject:02}_Session_{ctx.session:02}"]

In [15]:
def write_ieeg_electrodes(ctx: SessionContext) -> ElectrodeGroup:
    nwb = ctx.nwb

    device = nwb.create_device(
        name="ATLAS Neurophysiology System",
        manufacturer="Neuralynx, Inc.",
        description="iEEG recording system",
    )

    # create an electrode group for this group
    electrode_group = nwb.create_electrode_group(
        name="ieeg",
        description=f"iEEG electrodes",
        device=device,
        location="Intracranial",
    )

    electrodes = _get_ieeg_electrodes(ctx)
    electrodes.apply(
        lambda row: _add_row_to_ieeg_electrodes(nwb, electrode_group, row), axis=1
    )
    return electrode_group

In [16]:
def _get_ieeg_electrodes(ctx: SessionContext) -> pd.DataFrame:
    labels = _get_ieeg_electrode_labels(ctx)
    locations = _get_ieeg_electrode_locations(ctx)
    locations_array = np.ndarray(locations.shape)
    locations.read_direct(locations_array)
    df = pd.DataFrame(locations_array, columns=["x", "y", "z"])
    df.insert(0, "label", labels)
    return df.reset_index()

In [17]:
def _get_ieeg_electrode_labels(ctx: SessionContext) -> List[str]:
    return (
        _get_session_data(ctx)
        .groups["iEEG electrode information"]
        .data_arrays["iEEG_Electrode_Manual_Entry"]
        .dimensions[0]
        .labels
    )

In [18]:
def _get_ieeg_electrode_locations(ctx: SessionContext) -> nixio.DataArray:
    return (
        _get_session_data(ctx)
        .groups["iEEG electrode information"]
        .data_arrays["iEEG_Electrode_MNI_Coordinates"]
    )

In [19]:
def _add_row_to_ieeg_electrodes(
        nwb: NWBFile, electrode_group: ElectrodeGroup, row: pd.Series
):
    # Got MNI map: +X is right, +Y is anterior, +Z is superior according to <https://kathleenhupfeld.com/mni-template-coordinate-systems/>
    # But need NWB: +X is posterior, +Y is inferior, +Z is right according to <https://pynwb.readthedocs.io/en/stable/pynwb.file.html#pynwb.file.NWBFile.add_electrode>
    # TODO: Got "Inside/Outside SOZ as well"
    nwb.add_electrode(
        group=electrode_group,
        location=row["label"],
        reference="Common intracranial reference",
        x=-row["y"],
        y=-row["z"],
        z=row["x"],
        filtering="Passband, 0.5 to 1000 Hz",
    )

In [23]:
def write_ieeg_measurements(ctx: SessionContext):
    nwb = ctx.nwb
    ieeg_electrode_indices = list(range(_get_ieeg_electrode_count(ctx)))
    ieeg_table_region = nwb.create_electrode_table_region(
        region=ieeg_electrode_indices,  # reference row indices 0 to N-1
        description="ieeg electrodes",
    )
    trials = _get_session_data(ctx).groups["iEEG data"].data_arrays
    offset = _get_measurement_offset(ctx)
    duration = _get_trial_duration(ctx)
    data = []
    timestamps = []
    for trial in trials:
        trial_number = int(_IEEG_RE.findall(trial.name)[0])
        data.append(trial[:])
        sampling_interval = trial.dimensions[1].sampling_interval
        times = [
            i * sampling_interval - offset + trial_number * duration
            for i in range(trial.shape[1])
        ]
        timestamps.extend(times)
    data = np.concatenate(data, axis=1).transpose()

    compressed_data = H5DataIO(
        data=data,
        compression="gzip",
    )
    electrical_series = ElectricalSeries(
        name="ecephys.ieeg",
        description="iEEG data",
        data=compressed_data,
        electrodes=ieeg_table_region,
        timestamps=timestamps,
    )
    lfp = LFP(electrical_series)
    ecephys_module = nwb.create_processing_module(
        name="ecephys", description="processed extracellular electrophysiology data"
    )
    ecephys_module.add(lfp)

In [24]:
_IEEG_RE = re.compile(r"iEEG_Data_Trial_(\d+)")

In [25]:
def _get_ieeg_electrode_count(ctx: SessionContext) -> int:
    return len(_get_ieeg_electrode_labels(ctx))

In [26]:
def write_events(ctx: SessionContext):
    nwb = ctx.nwb
    session = _get_session_data(ctx)
    tags = session.groups[
        "Trial events single tags spike times"
    ].tags  # same as EEG and iEEG in this case
    tags_by_trial = [(_EVENT_RE.findall(tag.name)[0], tag.position) for tag in tags]
    events = [
        (int(trial), name, position[0])
        for (name, trial), position in tags_by_trial
        if name != "Response"
    ]
    events.sort(key=lambda x: x[0])
    offset = _get_measurement_offset(ctx)
    duration = _get_trial_duration(ctx)
    events = [
        (name, time - offset + trial_number * duration)
        for trial_number, name, time in events
    ]
    events.append(("END", len(events) * duration))

    for (name, start), (_, end) in zip(events, events[1:]):
        nwb.add_epoch(
            start_time=start,
            stop_time=end,
            tags=name,
        )

In [27]:
def write_trial_data(ctx: SessionContext):
    # TODO
    nwb = ctx.nwb
    nwb.add_trial_column(name="trial_number", description="The trial number")
    nwb.add_trial_column(
        name="set_size",
        description="Number of letters shown during encoding period (4, 6, or 8 letters)",
    )
    nwb.add_trial_column(
        name="probe_letter",
        description="The letter presented to the participant during the retrieval period",
    )
    nwb.add_trial_column(
        name="response",
        description='The participant\'s answer to the question "Was the letter at hand present in the encoding set?"',
    )
    nwb.add_trial_column(
        name="response_time",
        description="The time at which the participant responded during the retrieval period",
    )
    nwb.add_trial_column(
        name="solution",
        description='The solution to the question "Was the letter at hand present in the encoding set?"',
    )
    nwb.add_trial_column(
        name="artifact",
        description="Whether the trial data was marked as an artifact by the experimenter",
    )

    nix = ctx.nix
    trials = nix.sections["Session"].sections["Trial properties"].sections
    offset = _get_measurement_offset(ctx)
    duration = _get_trial_duration(ctx)
    for trial in trials:
        trial = trial.props
        trial_number = int(trial["Trial number"].values[0])
        start_time = offset + trial_number * duration
        stop_time = start_time + duration
        response_time = trial["Response time"].values[0] + start_time
        # See https://gin.g-node.org/USZ_NCH/Human_MTL_units_scalp_EEG_and_iEEG_verbal_WM/issues/2#issuecomment-3729
        response = str(trial["Response"].values[0])[1] == 1
        solution = int(trial["Match"].values[0]) == 1
        nwb.add_trial(
            start_time=start_time,
            stop_time=stop_time,
            trial_number=int(trial["Trial number"].values[0]),
            set_size=int(trial["Set size"].values[0]),
            probe_letter=str(trial["Probe letter"].values[0]),
            response=response,
            response_time=response_time,
            solution=solution,
            artifact=bool(trial["Artifact"].values[0]),
        )

In [28]:
def write_behavior(ctx: SessionContext):
    # TODO
    nwb = ctx.nwb
    behavior_module = nwb.create_processing_module(
        name="behavior", description="Data for all trials in this session."
    )
    nix = ctx.nix
    trials = nix.sections["Session"].sections["Trial properties"].sections
    offset = _get_measurement_offset(ctx)
    duration = _get_trial_duration(ctx)
    data = []
    timestamps = []
    for trial in trials:
        trial_number = int(trial["Trial number"])
        trial = trial.props
        # See https://gin.g-node.org/USZ_NCH/Human_MTL_units_scalp_EEG_and_iEEG_verbal_WM/issues/2#issuecomment-3729
        data.append(str(trial["Response"].values[0])[1] == 1)
        time = trial["Response time"].values[0] - offset + trial_number * duration
        timestamps.append(time)
        # Have to do this because everything after the response is fake time inbetween trials because NWB requires all trials to be on one long stretch
        nwb.add_invalid_time_interval(
            start_time=time,
            stop_time=(trial_number + 1.0) * duration,
        )

    time_series = TimeSeries(
        name="response",
        data=data,
        timestamps=timestamps,
        description='The participant\'s answer to the question "Was the letter at hand present in the encoding set?"',
        unit="n/a",  # Might as well use https://github.com/rly/ndx-events, but it's not built-in...
        continuity="instantaneous",
    )

    behavioral_events = BehavioralEvents(
        name=f"BehavioralEvents.response", time_series=time_series
    )

    behavior_module.add(behavioral_events)

In [29]:
_EVENT_RE = re.compile(r"Event_([a-zA-Z]+)_.*Trial_(\d\d)_.*")

In [30]:
def write_waveforms(ctx: SessionContext, ieeg_electrode_group: ElectrodeGroup):
    nwb = ctx.nwb
    session = _get_session_data(ctx)
    waveforms = session.groups["Spike waveforms"].data_arrays
    spike_times = session.groups["Spike times"].data_arrays
    waveforms = [
        (_WAVEFORM_RE.findall(waveform.name)[0], waveform) for waveform in waveforms
    ]
    spike_times = [
        (_SPIKE_TIMES_RE.findall(spike_time.name)[0], spike_time[:])
        for spike_time in spike_times
    ]
    unit_to_waveform = {
        int(unit): (electrode, channel, values)
        for ((unit, electrode, channel), values) in waveforms
    }
    unit_to_trial_to_spike_times = {}
    for (unit, electrode, channel, trial), values in spike_times:
        unit_to_trial_to_spike_times.setdefault(int(unit), {})[trial] = (
            electrode,
            channel,
            values,
        )
    for unit, (electrode, channel, waveform) in unit_to_waveform.items():
        trial_to_spike_times = unit_to_trial_to_spike_times[unit]

        spike_times_for_trials = []
        for trial, (electrode_, channel_, spike_times) in trial_to_spike_times.items():
            assert electrode == electrode_
            assert channel == channel_
            spike_times_for_trials.append((trial, spike_times))
        spike_times_for_trials.sort(key=lambda x: x[0])
        spike_times_for_trials = [
            spike_times for _, spike_times in spike_times_for_trials
        ]
        spike_times_for_trials = _untrialize_irregular_timestamps(
            spike_times_for_trials, ctx
        )

        electrode_label = f"m{electrode}{channel}"
        electrode_index = _get_electrode_index(ctx, electrode_label)
        data_point_count = waveform.shape[1]
        sample_interval = waveform.dimensions[1].sampling_interval
        offset = waveform.dimensions[1].offset
        min_time = 0 + offset
        max_time = min_time + data_point_count * sample_interval
        data = waveform[:]
        nwb.add_unit(
            id=int(unit),
            electrode_group=ieeg_electrode_group,
            electrodes=[electrode_index],
            waveform_mean=data[0],
            waveform_sd=data[1],
            obs_intervals=[
                (time + min_time, time + max_time) for time in spike_times_for_trials
            ],
            spike_times=spike_times_for_trials,
        )

In [31]:
# Spike_Waveform_Unit_1_uAHL_2
_WAVEFORM_RE = re.compile(r"Spike_Waveform_Unit_(\d+)_u([a-zA-Z]+)_(\d+)")
# Spike_Times_Unit_36_uPHR_1_Trial_16
_SPIKE_TIMES_RE = re.compile(r"Spike_Times_Unit_(\d+)_u([a-zA-Z]+)_(\d+)_Trial_(\d+)")

In [32]:
def _untrialize_irregular_timestamps(
        timestamps: List[List[float]], ctx: SessionContext
) -> List[float]:
    offset = _get_measurement_offset(ctx)
    duration = _get_trial_duration(ctx)
    untrialized = []
    for trial, times in enumerate(timestamps):
        times = [time - offset + trial * duration for time in times]
        untrialized.extend(times)
    return untrialized

In [33]:
def _get_electrode_index(ctx: SessionContext, electrode: str) -> int:
    return ctx.nwb.electrodes["location"][:].index(electrode)

In [34]:
def _get_trial_duration(ctx: SessionContext) -> float:
    offset = _get_measurement_offset(ctx)
    return ctx.nix.sections["Session"].props["Trial duration"].values[0] - offset

In [35]:
def _get_measurement_offset(ctx: SessionContext) -> float:
    return (
        _get_session_data(ctx)
        .groups["Scalp EEG data"]
        .data_arrays[0]
        .dimensions[1]
        .offset
    )

Main

In [36]:
if __name__ == "__main__":
    context = convert_nix_to_nwb(1, 1)
    write_nwb(context)

In [43]:
if __name__ == "__main__":
    nwb = read_nwb(context.subject, context.session)
    nwb2widget(nwb)

In [40]:
print("Test")

Test


DELETE ANYTHING PYCHARM GENERATES AFTER THIS