## File type demo

This notebook demonstrates some of the capabilities of the eegyolk library in terms of reading the cnt file type. It also demonstrates the importance of the dataformat argument in the mne library. Previous work had let some arguments in reading cnt files default to preset values.
This notebook specifically demonstrates how whether the format of int32 or int16 influences the understanding of the file

#### Imports

In [1]:
import mne      # toolbox for analyzing and visualizing EEG data
import os       # using operating system dependent functionality (folders)
import pandas   # data analysis and manipulation
import numpy    # numerical computing (manipulating and performing operations on arrays of data)
import copy     # Can Copy and Deepcopy files so original file is untouched.
import glob
import numpy as np
import pandas as pd
from numpy.fft import fft, fftfreq
from scipy import signal

import matplotlib
import matplotlib.pyplot as plt

from mne.time_frequency.tfr import morlet
from mne.viz import plot_filter, plot_ideal_filter


from IPython.display import clear_output
import warnings

import sys

Below eegyolk is importted in it's most updated version
But the cell could be replaced by importing the stable library and then importing the modules of it.

In [2]:

import sys
sys.path.insert(0, '../eegyolk') # path to helper functions
import helper_functions as hf # library useful for eeg and erp data cleaning
#import initialization_functions #library to import data
import epod_helper
import rawf
from config import Config
from rawf import RawData

#### Load EEG files
Change your config file to change where data is coming from

In [3]:
!pwd

/home/cmoore/eegyolk/demos


In [4]:
config = Config()


In [5]:
config.get_directory('data')

'/volume-ceph/DDP_projectfolder/dataset'

In [6]:
#eeg_file_root = "../../volume-ceph/ePodium_projectfolder"
acquired = RawData(config.get_directory('data'), config.get_directory('metadata'))

Note we assume you are looking at bdf files

In [7]:
acquired

<rawf.RawData at 0x7fe5712ab820>

In [8]:
acquired.raw.head()

Unnamed: 0,code,cnt_path,cnt_file,age_group,age_days,age_months,age_years
0,35,/volume-ceph/DDP_projectfolder/dataset/11mnd m...,035_11_jc_mmn36_slp_mmn25_slp,11,331.0,11.033333,0.919444
1,27,/volume-ceph/DDP_projectfolder/dataset/11mnd m...,027_11_jc_mmn25_wk,11,326.0,10.866667,0.905556
2,25,/volume-ceph/DDP_projectfolder/dataset/11mnd m...,025_11_mc_mmn36_wk,11,360.0,12.0,1.0
3,35,/volume-ceph/DDP_projectfolder/dataset/11mnd m...,035_11_jc_mmn36slp_mmn25_slp_2,11,331.0,11.033333,0.919444
4,30,/volume-ceph/DDP_projectfolder/dataset/11mnd m...,030_11_jc_mmn36_wk_mmn25_wk,11,328.0,10.933333,0.911111


In [9]:
data_raw = acquired.as_mne[2]

In [10]:
print('Data type: {}\n\n{}\n'.format(type(data_raw), data_raw))

# Get the sample rate
print('Sample rate:', data_raw.info['sfreq'], 'Hz')

# Get the size of the matrix
print('Size of the matrix: {}\n'.format(data_raw.get_data().shape))

# The mne.info class can be used to learn more about the data.
print(data_raw.info)

Data type: <class 'mne.io.cnt.cnt.RawCNT'>

<RawCNT | 025_11_mc_mmn36_wk.cnt, 64 x 195390 (390.8 s), ~95.5 MB, data loaded>

Sample rate: 500.0 Hz
Size of the matrix: (64, 195390)

<Info | 8 non-empty values
 bads: []
 ch_names: O2, O1, OZ, PZ, P4, CP4, P8, C4, TP8, T8, P7, P3, CP3, CPZ, CZ, ...
 chs: 62 EEG, 2 EOG
 custom_ref_applied: False
 highpass: 0.0 Hz
 lowpass: 250.0 Hz
 meas_date: 2002-03-19 11:50:16 UTC
 nchan: 64
 projs: []
 sfreq: 500.0 Hz
 subject_info: 5 items (dict)
>


# Now we will show as a pandas dataframe

In [11]:
raw_df = data_raw.to_data_frame()
raw_df.head()

Unnamed: 0,time,O2,O1,OZ,PZ,P4,CP4,P8,C4,TP8,...,F2,F6,FC5,F1,AF4,AF8,F5,AF7,AF3,FPZ
0,0,6253748.0,8242944.0,15000950.0,24360910.0,10690540.0,1188921.0,3539428.0,17058890.0,-2513232.0,...,-4592944.0,3691768.0,12936850.0,21315070.0,7617701.0,5630349.0,7997737.0,5734013.0,749085.4,8542372.0
1,2,7166750.0,9477653.0,16187340.0,25425050.0,12539220.0,1337556.0,4167398.0,18106560.0,-898265.7,...,-3960995.0,4459465.0,13811430.0,22133570.0,8093100.0,6695248.0,8453425.0,6465057.0,4346442.0,10169480.0
2,4,6961324.0,9260436.0,15639770.0,25081770.0,11632090.0,1063176.0,3916206.0,17890190.0,-1182595.0,...,-5777863.0,2551495.0,11891890.0,20212370.0,6451839.0,4973270.0,6778696.0,4169124.0,-317753.9,8372844.0
3,6,4096767.0,6105062.0,12251700.0,21683370.0,8577715.0,-1349102.0,1404325.0,14656050.0,-5129023.0,...,-9208469.0,-1083805.0,8166398.0,16551840.0,3271193.0,1370719.0,3622926.0,251190.2,-6650079.0,4553613.0
4,8,2031094.0,4047205.0,10129880.0,19474970.0,6683092.0,-3052541.0,-296903.5,12583460.0,-6982832.0,...,-9569584.0,-1388626.0,7666639.0,16063020.0,2908989.0,804284.2,3326748.0,445368.4,-8068571.0,2350257.0


Now we used the default settings above, let's just switch the settings and compare

In [12]:
def read_raw_agnostic(fname, int_arg):
    """
    For testing purposes
    """
    
    cnt_read_args = {
        'eog': 'auto',
        'data_format': int_arg,
        'date_format': 'dd/mm/yy',
        'verbose': False,
    }
    read = mne.io.read_raw_cnt(
            fname,
            data_format=int_arg,
            preload=True,
            
        )
    return read

In [13]:
n = 2

In [14]:
paths_df = acquired.raw
paths_df.cnt_path[n]

'/volume-ceph/DDP_projectfolder/dataset/11mnd mmn/025_11_mc_mmn36_wk.cnt'

In [15]:
read_on_32 = read_raw_agnostic(paths_df.cnt_path[n], 'int32')


Reading 0 ... 195389  =      0.000 ...   390.778 secs...


  read = mne.io.read_raw_cnt(


In [16]:
raw_df32 = read_on_32.to_data_frame()
raw_df32.head()

Unnamed: 0,time,O2,O1,OZ,PZ,P4,CP4,P8,C4,TP8,...,F2,F6,FC5,F1,AF4,AF8,F5,AF7,AF3,FPZ
0,0,6253748.0,8242944.0,15000950.0,24360910.0,10690540.0,1188921.0,3539428.0,17058890.0,-2513232.0,...,-4592944.0,3691768.0,12936850.0,21315070.0,7617701.0,5630349.0,7997737.0,5734013.0,749085.4,8542372.0
1,2,7166750.0,9477653.0,16187340.0,25425050.0,12539220.0,1337556.0,4167398.0,18106560.0,-898265.7,...,-3960995.0,4459465.0,13811430.0,22133570.0,8093100.0,6695248.0,8453425.0,6465057.0,4346442.0,10169480.0
2,4,6961324.0,9260436.0,15639770.0,25081770.0,11632090.0,1063176.0,3916206.0,17890190.0,-1182595.0,...,-5777863.0,2551495.0,11891890.0,20212370.0,6451839.0,4973270.0,6778696.0,4169124.0,-317753.9,8372844.0
3,6,4096767.0,6105062.0,12251700.0,21683370.0,8577715.0,-1349102.0,1404325.0,14656050.0,-5129023.0,...,-9208469.0,-1083805.0,8166398.0,16551840.0,3271193.0,1370719.0,3622926.0,251190.2,-6650079.0,4553613.0
4,8,2031094.0,4047205.0,10129880.0,19474970.0,6683092.0,-3052541.0,-296903.5,12583460.0,-6982832.0,...,-9569584.0,-1388626.0,7666639.0,16063020.0,2908989.0,804284.2,3326748.0,445368.4,-8068571.0,2350257.0


In [17]:
read_on_16 = read_raw_agnostic(paths_df.cnt_path[n], 'int16')
raw_df16 = read_on_16.to_data_frame()
raw_df16.head()

Reading 0 ... 390779  =      0.000 ...   781.558 secs...


  read = mne.io.read_raw_cnt(


Unnamed: 0,time,O2,O1,OZ,PZ,P4,CP4,P8,C4,TP8,...,F2,F6,FC5,F1,AF4,AF8,F5,AF7,AF3,FPZ
0,0,-274.444902,95.420848,211.836368,125.883886,104.424014,229.393813,166.724743,369.939166,382.993497,...,21.3517,75.96899,126.863087,115.004328,-40.58722,75.54019,107.605393,-40.784313,80.34632,118.103448
1,2,-271.136239,103.968602,220.539587,137.058045,115.462123,241.256002,171.777008,379.843596,390.802608,...,30.650021,85.615846,122.357021,121.769288,-29.706391,86.603284,102.911781,11.503268,94.199134,130.344827
2,4,-268.524137,109.376365,226.631841,144.740279,121.243989,247.535985,174.73868,386.099026,390.629072,...,37.193284,95.262702,113.171579,126.279262,-21.93437,94.0363,96.13212,52.28758,102.164502,145.862068
3,6,-266.086174,112.341913,230.287193,148.4068,122.99607,248.931537,175.087112,389.052979,386.117142,...,40.292725,101.808783,106.23917,128.707709,-18.825562,97.666378,71.621037,66.753811,100.779221,155.172413
4,8,-270.962099,106.236374,224.717133,141.42295,114.410874,239.162675,167.770039,380.886168,374.490243,...,32.716315,96.640824,100.866553,122.28967,-27.288429,88.677614,33.202956,44.444443,86.406926,148.275862


In [18]:
# The big difference

In [19]:
(raw_df32 -raw_df16).sum().sum()

-37583506229279.79

### Youch!

So here we saw an example of how reading the same file on int32 and int16 will give us dramatically different results. We must know which we have when we use cnt files.

## This is no floating point error!

So is there anything we can do to figure out which kind of files we had?
Why, yes of course. First of all on a few files in the DDP dataset there is information on this
in the metadata (numchannels). Second of all, we can be a bit sneaky, and ask ourselves if
32 is a possibility. When we look at the number of bit and bytes inside the file for the
information, is this a number that we could divide by 32? Well it turns out when you peel the header off of
the files you can calculate the number of bits/bytes inside. And they are (all tested so far) with a bit
number divisible by 32 (or a byte number divisible by 4). But wait, that means de-facto , they
are also divisble by 16. So we can say probably the files should have been read as 32; but
we can not say with 100% certainty. This is not a problem outside the cnt format, but for cnt formatted
data, back to cell 1.

In [20]:
4*8

32

So we now have to rely on subject matter expertise. Which values are, when read correctly in range for a proper EEG? And obviosly, we can't simply look at one single file. Let's take a look at multiple ones.

In [24]:
eeg_files= []
for file in acquired.raw.cnt_path:
    eeg_files.append(file)
# eeg_files

In [31]:
## This cell will speak what you need for widgets, probably into even the workspce
# !pip install ipympl

In [29]:
%matplotlib widget
import ipywidgets as widgets

In [30]:
list_of_numbers_strung = []
for i in range(len(eeg_files)):
    list_of_numbers_strung.append(str(i))


btn = widgets.Dropdown(
    options=list_of_numbers_strung,
    value='0',
    description='Picked File:',
    disabled=False,
)
display(btn)

Dropdown(description='Picked File:', options=('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '1…

In [35]:
number_chosen = int(btn.value)
file_chosen = eeg_files[number_chosen] 
print("The file you chose is:",file_chosen)

The file you chose is: /volume-ceph/DDP_projectfolder/dataset/47mnd mmn/119_47_jr_mmn39.cnt


In [42]:
file_read_32 = read_raw_agnostic(file_chosen, 'int32')
file_read_16 = read_raw_agnostic(file_chosen, 'int16')
file_read_32_df = file_read_32.to_data_frame()
file_read_16_df = file_read_16.to_data_frame()

Reading 0 ... 236399  =      0.000 ...   472.798 secs...


  read = mne.io.read_raw_cnt(


Reading 0 ... 472799  =      0.000 ...   945.598 secs...


  read = mne.io.read_raw_cnt(


In [43]:
file_read_32_df.head()


Unnamed: 0,time,O2,O1,OZ,PZ,P4,CP4,P8,C4,TP8,...,F2,F6,FC5,F1,AF4,AF8,F5,AF7,AF3,FPZ
0,0,-3647282.0,-946016.6,-364158.9,-2831550.0,355121.5,-2145599.0,-1813052.0,-2327413.0,-7075182.0,...,-9032509.0,-3107359.0,-4888243.0,-2026217.0,-4869230.0,-4058429.0,-3986451.0,-6287892.0,-6257822.0,-4501096.0
1,2,-4137389.0,-1914826.0,-1456431.0,-3516607.0,-1443688.0,-2933057.0,-2451614.0,-2781545.0,-7222614.0,...,-10243640.0,-4463286.0,-5252068.0,-2902689.0,-5763816.0,-4831450.0,-4237747.0,-5018904.0,-5791325.0,-4795134.0
2,4,-4262757.0,-3407928.0,-5245229.0,-4224515.0,-3059170.0,-4622093.0,-3671741.0,-2940496.0,-6916488.0,...,-10752990.0,-5129944.0,-5308945.0,-3323850.0,-6363973.0,-6025082.0,-4340556.0,-4618769.0,-6599156.0,-4840374.0
3,6,-4570499.0,-3681480.0,-6872252.0,-4544205.0,-2302980.0,-5044352.0,-4002433.0,-3212974.0,-6564967.0,...,-10571880.0,-4971754.0,-5354416.0,-3471827.0,-6194118.0,-7002726.0,-4420515.0,-4390131.0,-6940495.0,-4286237.0
4,8,-4581895.0,-2541707.0,-4482917.0,-4167393.0,870717.1,-3994403.0,-3044589.0,-3258382.0,-6825749.0,...,-9360755.0,-3796625.0,-5229324.0,-2800244.0,-5571320.0,-7400603.0,-4591854.0,-5613407.0,-4824246.0,-4433246.0


In [44]:
file_read_16_df.head()

Unnamed: 0,time,O2,O1,OZ,PZ,P4,CP4,P8,C4,TP8,...,F2,F6,FC5,F1,AF4,AF8,F5,AF7,AF3,FPZ
0,0,-60.869569,-55.82609,-18.57639,-14.634147,-70.454545,-5.746625,-29.404087,-43.135556,-55.536332,...,-82.037997,-48.620689,-33.824802,-57.316545,-91.576671,-100.780565,-48.104574,-85.651986,-89.409726,-65.573774
1,2,-68.34783,-60.34783,-23.611112,-18.118467,-73.251747,-3.656943,-33.057849,-46.946729,-61.418685,...,-85.492229,-61.724138,-34.171723,-60.790275,-93.131747,-95.576749,-43.921568,-96.118624,-90.451393,-68.852462
2,4,-72.86957,-63.304352,-35.069446,-29.442509,-75.524475,-22.464081,-41.061328,-53.529665,-61.937716,...,-87.564768,-70.862069,-35.212486,-63.395573,-96.069112,-84.822196,-38.169934,-93.327521,-91.319449,-70.75065
3,6,-70.956526,-64.521743,-47.395836,-43.205576,-75.524475,-53.112751,-55.154411,-60.112602,-60.553633,...,-88.428326,-73.448276,-37.987855,-64.611378,-96.760257,-76.322631,-35.729847,-88.966422,-93.576393,-73.339089
4,8,-67.652178,-65.217395,-56.076392,-52.26481,-73.951048,-80.452757,-68.37755,-64.270246,-58.477508,...,-88.601037,-79.827586,-41.630526,-64.785065,-94.514036,-71.812657,-38.867101,-91.059749,-94.791671,-75.064714
