# EDF format checks

## There are 3 potential issues related to the conversion of raw signals to EDF:

1. Signal clipping:    signal cut once it exceeds an amplitude threshold
                       (the min-max range set before EDF conversion was 
                       too narrow)
2. Bit depth:          signal shows a stair-like progression (the min-max
                       range set before EDF conversion was too wide)
3. Inverted polarity:  signal multiplied by -1 

The present script is semi-automated: for each subject and channel,
figures are plotted to make a visual inspection. Problematic files are
stored in a table.

In [13]:
# Import libraries
import os, fnmatch
import time
import ipywidgets as widgets
from IPython.display import display
from tqdm.notebook import tqdm

import pandas as pd
import numpy as np
import mne
from mne.io.constants import FIFF

import seaborn as sns
import matplotlib.pyplot as plt

In [14]:
def show_channels_histograms(data, ch_name, ax, estimator=lambda x: x):
    unit = data._orig_units[ch_name]

    scaler = 1e+6 if unit == 'µV' else 1e+3
    X = data.get_data([ch_name])[0]
    X = estimator(X * scaler)

    # Plot figure
    sns.histplot(
        X,
        stat='percent',
        bins=50,
        kde=True,
        ax=ax
    )

    max_x = X.max() + abs(X.max())*0.05
    min_x = X.min() + abs(X.min())*0.05
    ax.set_xlim(min_x,max_x)
    ax.set_ylabel('Data points distribution (%)')
    ax.set_xlabel(f'Amplitude ({unit})')

In [15]:
# Path to EDF files: select folder containing the EDF files
subfolder = input('>>> input the folder path containing the EDF files')

# Path to EDF files: select folder containing the EDF files
filelist = [
    (root, filename)
    for root, dirs, files in os.walk(subfolder)
    for filename in files
    if fnmatch.fnmatch(filename, '*.edf')
]
print(f'>>> {len(filelist)} EDF files found')

>>> 3 EDF files found


In [18]:
def check_channel(data, ch_name):

    fig, axes = plt.subplots(1, 2, figsize=(15,10))
    axes = axes.flatten()

    show_channels_histograms(data, ch_name, axes[0], estimator=lambda x: x)
    show_channels_histograms(data, ch_name, axes[1], estimator=lambda x: np.abs(np.diff(x)))
    plt.show()


def select_channels_and_plot(subID):
    subfolder = [root for root, curr_subID in filelist if curr_subID == subID][0]

    # Import the data
    print(f'>>> >>> Importing data from Subject {subID[:-4]}...')
    file_path = os.path.join(subfolder, subID)
    data = mne.io.read_raw_edf(
        file_path,
        preload=True,
        verbose=True
    )
    w_channels = widgets.Select(
        options=list(data.info['ch_names']),
        description='Select channel to check',
        disabled=False
    )

    def check_data_channel(ch_name):
        print(f'>>> >>> Plotting channel {ch_name} from file {subID}')
        print('(this can take some seconds)')
        check_channel(data, ch_name)

    widgets.interact(check_data_channel, data=data, ch_name=w_channels)

w_files = widgets.Select(
    options=[filename for root, filename in filelist],
    description='Select the EDF file to check',
    disabled=False
)

widgets.interact(select_channels_and_plot, subID=w_files)


interactive(children=(Select(description='Select the EDF file to check', options=('02ZB_EEG_clipped.edf', '01H…

<function __main__.select_channels_and_plot(subID)>