In [116]:
from typing import List
import numpy as np
import pandas as pd
from hdmf.backends.hdf5 import H5DataIO
from pynwb import NWBFile
from pynwb.file import Subject
from pynwb.ecephys import ElectricalSeries, ElectrodeGroup, LFP
import nixio
import regex as re
from usz_neuro_conversion.common import (
    SessionContext,
    NixContext,
    get_metadata_row,
    read_nix,
    get_date,
    write_nwb,
    standardize_sex,
    find_nix_files,
)

In [117]:
def convert_nix_to_nwb(subject: int, session: int) -> SessionContext:
    ctx = create_context(subject, session)
    write_subject(ctx)
    add_electrode_columns(ctx)
    ieeg_electrode_group = write_ieeg_electrodes(ctx)
    write_ieeg_measurements(ctx)
    write_trial_data(ctx)
    write_waveforms(ctx, ieeg_electrode_group)
    return ctx

In [118]:
def create_context(subject: int, session: int) -> SessionContext:
    nix_context = NixContext(
        subject, session, project="Human_Amygdala_MUA_sEEG_FearVideo"
    )
    nix = read_nix(nix_context)
    general = nix.sections["General"]
    nwb = NWBFile(
        session_description="Running experiment as described in the the experiment description",
        identifier=f"Human_Amygdala_MUA_sEEG_FearVideo_subject{subject:02}_session{session:02}",
        session_start_time=get_date(nix_context),
        lab=general.props["Recording location"].values[0],
        institution="Universitätsspital Zürich, 8091 Zurich, Switzerland",  # Broken UTF-8 in file
        related_publications=_get_related_publications(nix),
        experimenter="Fedele, Tommaso",  # TODO is this right?
        experiment_description=_get_experiment(nix),
        keywords=[
            "Neuroscience",
            "Electrophysiology",
            "Human",
            "Awake",
            "Local field potential",
            "Neuronal action potential",
            "Spikes",
            "Amygdala",
            "Intracranial EEG",
            "Cognitive task",
            "Dynamic visual stimuli",
            "Aversive stimuli",
            "Epilepsy",
        ],
    )
    return nix_context.to_session_context(nix, nwb)

In [119]:
def _get_experiment(nix: nixio.File) -> str:
    task = nix.sections["Task"].props
    task_name = task["Task name"].values[0]
    # Broken UTF-8 in file
    task_desc = "We used a paradigm comprising of a series of dynamic videos, which has been already validated in previous clinical investigations (Schacher et al., 2006). The videos were all silent and consisted of dynamic fearful faces and dynamic neutral landscapes, presented in an alternating order, in a block design. The paradigm included eight blocks of 75 short video clips (2–3 s) of fearful faces (aversive condition) and nine blocks of 72 short video clips (2–3 s) of neutral landscapes (neutral condition). Each block lasted 24 s in total, and contained short video clips without any intermission between consecutive videos. Video clips of fearful faces were extracted from thriller and horror movies and contained faces of actors showing fear, without being violent or aggressive. Video clips of neutral landscapes were chosen as a control condition, and were matched to the duration of the fearful faces videos (2–3 s). They included domestic landscapes which are posited to have a low emotional content and visual properties comparable to the emotional videos (Schacher et al., 2006). All videos were only included once. A panel of psychologists had evaluated the stimuli to ensure that they are suitable for the patients and that they do not include any episodes of violence or aggression (Schacher et al., 2006). In particular, we started with a set of 120 videos of fearful faces and reduced that to 72, by excluding videos where: (a) the actor’s face was not continuously visible (b) fear was not clearly recognized on the actor’s face (c) no other emotion was displayed (e.g. anger/surprise) and (d) the display of fear was intense. During electrophysiological recordings the videos were presented to the patients via a laptop screen, while during the fMRI scan they were presented through a tilted overhead mirror. In both cases, patients were instructed to pay attention to the videos and focus on the eyes of the actors during the clips containing faces. For the electrophysiological recordings, blocks were separated by a repeated baseline of 2 s taken from a neutral condition."
    task_url = task["Task URL"].values[0]
    return (
        f"Task Name: {task_name}\nTask Description: {task_desc}\nTask URL: {task_url}"
    )

In [120]:
def _get_related_publications(nix: nixio.File) -> List[str]:
    related_publications = (
        nix.sections["General"].sections["Related publications"].props
    )
    dois = related_publications["Publication DOI"].values
    return [doi.strip() for doi in dois]

In [121]:
def write_subject(ctx: SessionContext):
    metadata = get_metadata_row(ctx.to_nix_context())
    age = metadata["Age"]
    sex = metadata["Sex"]
    ctx.nwb.subject = Subject(
        subject_id=f"{ctx.subject:02}",
        age=f"P{int(age)}Y",
        description=_get_subject_description(ctx),
        species="Homo sapiens",
        sex=standardize_sex(sex),
    )

In [122]:
def _get_subject_description(ctx: SessionContext) -> str:
    metadata = get_metadata_row(ctx.to_nix_context())
    subject = ctx.nix.sections["Subject"].props
    handedness = metadata["Handedness"]
    pathology = metadata["Pathology"]
    depth_electrodes = subject["Depth electrodes"].values[0]
    electrodes_in_soz = metadata["Electrodes in seizure onset zone (SOZ)"]
    return f"Handedness: {handedness}\nPathology: {pathology}\nDepth electrodes: {depth_electrodes}\nElectrodes in seizure onset zone (SOZ): {electrodes_in_soz}"

In [123]:
def add_electrode_columns(ctx: SessionContext):
    ctx.nwb.add_electrode_column(
        name="label",
        description="Channel label referenced by other data arrays",
    )
    ctx.nwb.add_electrode_column(
        name="is_inside_soz",
        description="Indicates whether the electrode is inside the seizure onset zone (SOZ)",
    )

In [124]:
def _get_session_data(ctx: SessionContext) -> nixio.Block:
    return ctx.nix.blocks[f"Data_Subject_{ctx.subject:02}_Session_{ctx.session:02}"]

In [125]:
def write_ieeg_electrodes(ctx: SessionContext) -> ElectrodeGroup:
    nwb = ctx.nwb

    device = nwb.create_device(
        name="ATLAS Neurophysiology System",
        manufacturer="Neuralynx, Inc.",
        description="iEEG recording system",
    )

    # create an electrode group for this group
    electrode_group = nwb.create_electrode_group(
        name="ieeg",
        description=f"iEEG electrodes",
        device=device,
        location="Intracranial",
    )

    electrodes = _get_ieeg_electrodes(ctx)
    electrodes.apply(
        lambda row: _add_row_to_ieeg_electrodes(nwb, electrode_group, row), axis=1
    )
    return electrode_group

In [126]:
def _get_ieeg_electrodes(ctx: SessionContext) -> pd.DataFrame:
    labels = _get_ieeg_electrode_labels(ctx)
    anatomical_locations = _get_ieeg_electrode_anatomical_locations(ctx)
    inside_soz = _get_ieeg_electrode_inside_soz(ctx)
    locations = _get_ieeg_electrode_locations(ctx)
    shape = locations.shape if len(locations.shape) > 1 else (1, locations.shape[0])
    locations_array = np.ndarray(shape)
    locations.read_direct(locations_array)
    df = pd.DataFrame(locations_array, columns=["x", "y", "z"])
    df.insert(0, "label", labels)
    df.insert(1, "anatomical_location", anatomical_locations)
    df.insert(2, "inside_soz", inside_soz)
    df = df.astype(
        {"label": "string", "anatomical_location": "string", "inside_soz": "bool"}
    )
    return df.reset_index()

In [127]:
def _get_ieeg_electrode_labels(ctx: SessionContext) -> List[str]:
    return [
        _extract_electrode_label(channel.sources[0].name)
        for channel in _get_session_data(ctx)
        .groups["iEEG electrode information"]
        .data_arrays["iEEG_Electrode_Map"]
        .sources
    ]

In [128]:
def _extract_electrode_label(label: str) -> str:
    first, second = _LABEL_RE.findall(label)[0]
    return f"{first}-{second}"

In [129]:
_LABEL_RE = re.compile(r"m([a-zA-Z\d]+)-m([a-zA-Z\d]+)")

In [130]:
def _get_ieeg_electrode_anatomical_locations(ctx: SessionContext) -> List[str]:
    return [
        channel.sources[1].name
        if channel.sources[1].name != "no_label_found"
        else "unspecific"
        for channel in _get_session_data(ctx)
        .groups["iEEG electrode information"]
        .data_arrays["iEEG_Electrode_Map"]
        .sources
    ]

In [131]:
def _get_ieeg_electrode_inside_soz(ctx: SessionContext) -> List[bool]:
    return [
        channel.sources[2].name == "Inside SOZ"
        for channel in _get_session_data(ctx)
        .groups["iEEG electrode information"]
        .data_arrays["iEEG_Electrode_Map"]
        .sources
    ]

In [132]:
def _get_ieeg_electrode_locations(ctx: SessionContext) -> nixio.DataArray:
    return (
        _get_session_data(ctx)
        .groups["iEEG electrode information"]
        .data_arrays["iEEG_Electrode_MNI_Coordinates"]
    )

In [133]:
def _add_row_to_ieeg_electrodes(
        nwb: NWBFile, electrode_group: ElectrodeGroup, row: pd.Series
):
    # Got MNI map: +X is right, +Y is anterior, +Z is superior according to <https://kathleenhupfeld.com/mni-template-coordinate-systems/>
    # But need NWB: +X is posterior, +Y is inferior, +Z is right according to <https://pynwb.readthedocs.io/en/stable/pynwb.file.html#pynwb.file.NWBFile.add_electrode>

    nwb.add_electrode(
        group=electrode_group,
        label=row["label"],
        location=row["anatomical_location"],
        reference="Common intracranial reference",
        is_inside_soz=row["inside_soz"],
        x=-row["y"] if not np.isnan(-row["y"]) else None,
        y=-row["z"] if not np.isnan(-row["z"]) else None,
        z=row["x"] if not np.isnan(row["x"]) else None,
        filtering="Passband, 0.5 to 1000 Hz",
    )

In [134]:
def write_ieeg_measurements(ctx: SessionContext):
    nwb = ctx.nwb
    ieeg_electrode_indices = list(range(_get_ieeg_electrode_count(ctx)))
    ieeg_table_region = nwb.create_electrode_table_region(
        region=ieeg_electrode_indices,  # reference row indices 0 to N-1
        description="ieeg electrodes",
    )
    trials = _get_session_data(ctx).groups["iEEG data"].data_arrays
    duration = _get_trial_duration(ctx)
    data = []
    timestamps = []
    for trial in trials:
        trial_number = int(_IEEG_RE.findall(trial.name)[0]) - 1
        trial_data = (
            trial[:] if len(trial.shape) > 1 else trial[:].reshape((1, trial.shape[0]))
        )
        data.append(trial_data)
        sampling_interval = trial.dimensions[1].sampling_interval
        times = [
            i * sampling_interval + trial_number * duration
            for i in range(trial_data.shape[1])
        ]
        timestamps.extend(times)
    data = np.concatenate(data, axis=1).transpose()

    compressed_data = H5DataIO(
        data=data,
        compression="gzip",
    )
    sampling_interval = trials[0].dimensions[1].sampling_interval
    electrical_series = ElectricalSeries(
        name="ecephys.ieeg",
        description="iEEG data",
        data=compressed_data,
        electrodes=ieeg_table_region,
        starting_time=0.0,
        rate=sampling_interval,
    )
    lfp = LFP(electrical_series)
    ecephys_module = nwb.create_processing_module(
        name="ecephys", description="processed extracellular electrophysiology data"
    )
    ecephys_module.add(lfp)

In [135]:
_IEEG_RE = re.compile(r"iEEG_Data_Trial_(\d+)")

In [136]:
def _get_ieeg_electrode_count(ctx: SessionContext) -> int:
    return len(_get_ieeg_electrode_labels(ctx))

In [137]:
def write_trial_data(ctx: SessionContext):
    nwb = ctx.nwb
    nwb.add_trial_column(
        name="condition",
        description='Kind of videos presented in trial block. Either "Aversive", i.e. fearful faces, or "Neutral", i.e. neutral landscapes',
    )
    session = _get_session_data(ctx)
    tags = session.groups[
        "Trial events single tags spike times"
    ].tags  # same as iEEG in this case
    tags_by_trial = [(_EVENT_RE.findall(tag.name)[0], tag.position) for tag in tags]
    events = [
        (int(trial) - 1, condition, position[0])
        for (condition, trial), position in tags_by_trial
    ]
    events.sort(key=lambda x: x[0])
    offset = _get_measurement_offset(ctx)
    duration = _get_trial_duration(ctx)
    events = [
        (trial_number, condition, time + trial_number * duration)
        for trial_number, condition, time in events
    ]

    for trial_number, condition, start in events:
        start = start - offset if trial_number > 0 else start
        end = (trial_number + 1) * duration
        nwb.add_trial(
            id=trial_number,
            start_time=start,
            stop_time=end,
            condition=condition,
        )
        # Neutral stimulus between blocks
        # nwb.add_invalid_time_interval(
        #    start_time=end,
        #    stop_time=end - offset,
        # )

In [138]:
# Stimulus_Condition_2_Aversive_Trial_06_Spike_Times
_EVENT_RE = re.compile(r"Stimulus_Condition_\d_([a-zA-Z]+)_Trial_(\d+)_Spike_Times")

In [139]:
def write_waveforms(ctx: SessionContext, ieeg_electrode_group: ElectrodeGroup):
    nwb = ctx.nwb
    session = _get_session_data(ctx)
    waveforms = session.groups["Spike waveforms"].data_arrays
    spike_times = session.groups["Spike times"].data_arrays
    waveforms = [
        (_WAVEFORM_RE.findall(waveform.name)[0], waveform) for waveform in waveforms
    ]
    spike_times = [
        (_SPIKE_TIMES_RE.findall(spike_time.name)[0], spike_time[:])
        for spike_time in spike_times
    ]
    unit_to_waveform = {
        int(unit): (electrode, channel, values)
        for ((unit, electrode, channel), values) in waveforms
    }
    unit_to_trial_to_spike_times = {}
    for (unit, electrode, channel, trial), values in spike_times:
        unit_to_trial_to_spike_times.setdefault(int(unit), {})[trial] = (
            electrode,
            channel,
            values,
        )
    for unit, (electrode, channel, waveform) in unit_to_waveform.items():
        trial_to_spike_times = unit_to_trial_to_spike_times[unit]

        spike_times_for_trials = []
        for trial, (electrode_, channel_, spike_times) in trial_to_spike_times.items():
            assert electrode == electrode_
            assert channel == channel_
            spike_times_for_trials.append((trial, spike_times))
        spike_times_for_trials.sort(key=lambda x: x[0])
        spike_times_for_trials = [
            spike_times for _, spike_times in spike_times_for_trials
        ]
        spike_times_for_trials = _untrialize_irregular_timestamps(
            spike_times_for_trials, ctx
        )

        electrode_label = electrode  # Channel doesn't matter. We only have L and R. See https://www.sciencedirect.com/science/article/pii/S1053811920301920?via%3Dihub#tbl1
        electrode_index = _get_electrode_index(ctx, electrode_label)
        # Don't see a way to include waveform sampling rate...
        offset = _get_measurement_offset(ctx) + waveform.dimensions[1].offset
        data = waveform[:]
        nwb.add_unit(
            id=int(unit),
            electrode_group=ieeg_electrode_group,
            electrodes=[electrode_index],
            waveform_mean=data[0] - offset,
            waveform_sd=data[1],
            spike_times=spike_times_for_trials,
        )

In [140]:
# Spike_Waveform_Unit_1_uAHL_2
_WAVEFORM_RE = re.compile(r"Spike_Waveform_Unit_(\d+)_u([a-zA-Z]+)_(\d+)")
# Spike_Times_Unit_36_uPHR_1_Trial_16
_SPIKE_TIMES_RE = re.compile(r"Spike_Times_Unit_(\d+)_u([a-zA-Z]+)_(\d+)_Trial_(\d+)")

In [141]:
def _untrialize_irregular_timestamps(
        timestamps: List[List[float]], ctx: SessionContext
) -> List[float]:
    offset = _get_measurement_offset(ctx)
    duration = _get_trial_duration(ctx)
    untrialized = []
    for trial, times in enumerate(timestamps):
        times = [time - offset + trial * duration for time in times]
        untrialized.extend(times)
    return untrialized

In [142]:
def _get_electrode_index(ctx: SessionContext, electrode: str) -> int:
    nwb = ctx.nwb
    index = next(
        index
        for index, electrodes in enumerate(nwb.electrodes["label"][:])
        if electrode in electrodes
    )
    return nwb.electrodes["id"][index]

In [143]:
def _get_trial_duration(ctx: SessionContext) -> float:
    return (
        ctx.nix.sections["Session"].props["Trial duration"].values[0]
    )  # Offset already included

In [144]:
def _get_measurement_offset(ctx: SessionContext) -> float:
    return (
        _get_session_data(ctx).groups["iEEG data"].data_arrays[0].dimensions[1].offset
    )

Main

In [145]:
if __name__ == "__main__":
    context = convert_nix_to_nwb(1, 1)
    write_nwb(context)

In [146]:
if __name__ == "__main__":
    project = "Human_Amygdala_MUA_sEEG_FearVideo"
    for subject, sessions in find_nix_files(project).items():
        for session, _ in sessions.items():
            print(f"Converting subject {subject} session {session}")
            try:
                context = convert_nix_to_nwb(subject, session)
                write_nwb(context)
                print("Done")
            except Exception as e:
                print(f"Failed to convert {subject} {session}")
                print(e)
    print("Everything done!")

Converting subject 1 session 1
Done
Converting subject 2 session 1
Done
Converting subject 3 session 1
Done
Converting subject 4 session 1
Done
Converting subject 5 session 1
Done
Converting subject 6 session 1
Done
Converting subject 7 session 1
Done
Converting subject 8 session 1
Done
Converting subject 9 session 1
Done
Everything done!
