# EDF format checks

## There are 3 potential issues related to the conversion of raw signals to EDF:

1. Signal clipping:    signal cut once it exceeds an amplitude threshold
                       (the min-max range set before EDF conversion was 
                       too narrow)
2. Bit depth:          signal shows a stair-like progression (the min-max
                       range set before EDF conversion was too wide)
3. Inverted polarity:  signal multiplied by -1 

The present script is semi-automated: for each subject and channel,
figures are plotted to make a visual inspection. Problematic files are
stored in a table.

In [1]:
# Import libraries
import os
from ipywidgets import AppLayout
import ipywidgets as widgets
from IPython.display import display
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import mne



In [3]:
plt.rcParams["figure.figsize"] = (8,5)

def plot_channels_histograms(data, ch_name, estimator=lambda x: x):
    unit = data._orig_units[ch_name]

    scaler = 1e+6 if unit == 'µV' else 1e+3
    X = data.get_data([ch_name])[0]
    X = estimator(X * scaler)

    # Plot figure
    sns.histplot(
        X,
        stat='percent',
        bins=500
    )

    max_x = X.max() + abs(X.max())*0.05
    min_x = X.min() - abs(X.min())*0.05
    plt.xlim(min_x,max_x)
    plt.ylabel('Data points distribution (%)')
    plt.xlabel(f'Amplitude ({unit})')

def plot_channels_correlation(data):

    sns.heatmap(
        pd.DataFrame(
            data.get_data().T,
            columns=data.info['ch_names']
        ).corr(method='pearson')
    )

In [4]:
layout = {
    'border': '1px solid',
    'padding': '10px'
}

SELECT_CHANNEL = 'Select channel'
SELECT_FILE = 'Select file'

out_correlations = widgets.Output(layout=widgets.Layout(**layout))
out_distributions = widgets.Output(layout=widgets.Layout(**layout))
out_selectors = widgets.Output(layout=widgets.Layout(**layout))
out_summary = widgets.Output(layout=widgets.Layout(**layout))

folder_path = widgets.Text(
    placeholder='inser the files folder path',
    description='Path:',
    disabled=False
)

select_file = widgets.Dropdown(
    options=[],
    description='Files:',
    disabled=False
)

select_channel = widgets.Dropdown(
    options=[],
    description='Channels:',
    disabled=False
)

button = widgets.Button(
    description='Get summary table',
    button_style='info',
    tooltip='Generates a description table to look for signals of clipping or bit depth',
    icon='list',
    layout=widgets.Layout(
        margin='10px 5px 5px 5px'
    )
)

status = widgets.Button(
    description='',
    disabled=True,
    button_style='danger', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Process is busy',
    icon='spinner'
)

def disable_interactions():
    status.icon = 'spinner'
    status.button_style = 'danger'

    folder_path.disabled = True
    select_file.disabled = True
    select_channel.disabled = True
    button.disabled = True

def enable_interactions():
    status.icon = 'check'
    status.button_style = 'success'

    folder_path.disabled = False
    select_file.disabled = False
    select_channel.disabled = False
    button.disabled = False

def show_summary_table(a):

    disable_interactions()
    with out_summary:
        out_summary.clear_output()
    
    folder = folder_path.value

    if folder == '':
        with out_summary:
            print('>>> >> No folder was yet selected')
        enable_interactions()
        return 

    files = [
        f for f in os.listdir(folder)
        if os.path.isfile(os.path.join(folder, f))
        and f[-4:] == '.edf'
    ]

    if len(files) == 0:
        with out_summary:
            print(f'>>> >> No EDF files in folder {folder}')
        enable_interactions()
        return

    subject_summary_rows = []
    with out_summary:
        for filename in files:

            print(f'\n\n>>> >>> Reading file {filename}')

            filepath = os.path.join(folder, filename)
            data = mne.io.read_raw_edf(filepath, preload=True, verbose=True)

            total_chs = len(data.info['ch_names'])
            for ch_name in tqdm(data.info['ch_names'], total=total_chs):
                unit = data._orig_units[ch_name]

                scaler = 1e+6 if unit == 'µV' else 1e+3
                Data = data.get_data([ch_name])[0] * scaler
                
                delta_ampl = np.abs(np.diff(Data))
                delta_ampl = delta_ampl[delta_ampl != 0]

                subject_summary_rows.append([
                    filename,
                    ch_name,
                    data._orig_units[ch_name],
                    min(Data),
                    max(Data),
                    min(delta_ampl)
                ])

        out_summary.clear_output()
        df = pd.DataFrame(
            subject_summary_rows,
            columns=['File', 'Channel', 'Unit', 'Min', 'Max', 'BinGap']
        )

        out_summary.clear_output()
        display(df)
    enable_interactions()
    
button.on_click(show_summary_table)

with out_selectors:
    display(folder_path)
    display(select_file)
    display(select_channel)
    display(button)
    display(status)


a = AppLayout(
    header=out_selectors,
    left_sidebar=None,
    center=out_distributions,
    right_sidebar=out_correlations,
    footer=None,
    pane_widths=['0', '50%', '50%']
)

display(a)
display(out_summary)

def clean_channels():
    out_distributions.clear_output()
    select_channel.options = []

def clean_file():
    
    clean_channels()
    out_correlations.clear_output()

def clean_folder():

    clean_file()
    out_summary.clear_output()
    select_file.options = []

###### folder selection triggers ######
def load_filelist(change):
    disable_interactions()
    clean_folder()

    folder = change.new

    if not os.path.isdir(folder):
        enable_interactions()
        return 

    files = [
        f for f in os.listdir(folder)
        if os.path.isfile(os.path.join(folder, f))
        and f[-4:] == '.edf'
    ]

    if len(files) > 0:
        select_file.options = [SELECT_FILE] + files
    
    enable_interactions()

########################################

####### Select file triggers ###########
def load_data(filename):
        
    # Import the data
    file_path = os.path.join(folder_path.value, filename)
    return mne.io.read_raw_edf(
        file_path,
        preload=True,
        verbose=True
    )

def plot_correlations(data, filename):
    with out_correlations:
        out_correlations.clear_output()
        print(f'>>> >>> Plotting channels correlation from file {filename}')
        print('(this can take some seconds)')

        plot_channels_correlation(data)
        plt.title(f'Subject {filename} correlation between all channels')
        plt.tight_layout()

        out_correlations.clear_output()
        plt.show()

def select_file_trigger(change):
    disable_interactions()
    clean_file()

    filename = change.new

    if filename is None or filename == SELECT_FILE:
        enable_interactions()
        return

    data = load_data(filename)
    select_channel.options = [SELECT_CHANNEL] + data.info['ch_names']
    plot_correlations(data, filename)

    enable_interactions()


######################################

####### Select channel triggers ######
def select_channel_trigger(change):
    disable_interactions()
    out_distributions.clear_output()

    filename = select_file.value
    ch_name = change.new

    # If no channel was yet select, don't plot distributions
    if ch_name is None or ch_name == SELECT_CHANNEL:
        enable_interactions()
        return 

    data = load_data(filename) 
    with out_distributions:
        out_distributions.clear_output()

        print(f'>>> >>> Plotting channel {ch_name} from file {filename}')
        print('(this can take some seconds)')
    
        plt.figure(1)
        plot_channels_histograms(data, ch_name)
        plt.title(f'{filename} ({ch_name}) Amplitude distribution')
        plt.tight_layout()
        
        plt.figure(2)
        plot_channels_histograms(
            data,
            ch_name,
            estimator=lambda x: np.abs(np.diff(x))
        )
        plt.title(f'{filename} ({ch_name}) BinGap distribution')
        plt.tight_layout()

        out_distributions.clear_output()
        plt.show()
    
    enable_interactions()

######################################

folder_path.observe(load_filelist, names='value')
select_file.observe(select_file_trigger, names='value')
select_channel.observe(select_channel_trigger, names='value')

AppLayout(children=(Output(layout=Layout(border='1px solid', grid_area='header', padding='10px')), Output(layo…

Output(layout=Layout(border='1px solid', padding='10px'))