# Magics, Imports, and Versions

In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import json
import operator
from pathlib import Path
import pyedflib
from subprocess import call
import sys
import typing
import urllib

import fastai
from fastai.vision import *
from fastai.callbacks.hooks import *
from fastai.utils.mem import *

In [3]:
print(f'__Python VERSION: {sys.version}')

try:
    print(f'__pyTorch VERSION: {torch.__version__}')
    PYTORCH = True
except: 
    print("Pytorch Not Installed")
    PYTORCH = False

try:
    print(f'__fastai VERSION: {fastai.__version__}')
except:
    print("fastai Not Installed")
    
print('__CUDA VERSION')

! nvcc --version

if PYTORCH:
    print(f'__CUDNN VERSION: {torch.backends.cudnn.version()}')
    print(f'__Number CUDA Devices: {torch.cuda.device_count()}')
    
print(f'__Devices')

try:
    call(["nvidia-smi", "--format=csv", "--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free"])
    print(f'Active CUDA Device: GPU {torch.cuda.current_device()}')

    print (f'Available devices: {torch.cuda.device_count()}')
    print (f'Current cuda device {torch.cuda.current_device()}')
except:
    print("No GPUs Found")

__Python VERSION: 3.7.1 (default, Dec 14 2018, 19:28:38) 
[GCC 7.3.0]
__pyTorch VERSION: 1.0.1.post2
__fastai VERSION: 1.0.53.dev0
__CUDA VERSION
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2018 NVIDIA Corporation
Built on Sat_Aug_25_21:08:01_CDT_2018
Cuda compilation tools, release 10.0, V10.0.130
__CUDNN VERSION: 7402
__Number CUDA Devices: 1
__Devices
Active CUDA Device: GPU 0
Available devices: 1
Current cuda device 0


# Load in Data Dict

In [4]:
here = Path(f'{os.getcwd()}')
data_path = here.parent/'data'
raw_path = data_path/'raw'/'v1.5.0/edf'

In [5]:
# load the file back in 
with open(here.parent/'data_dict.json', 'r') as file:
     data_dict = json.load(file)

In [6]:
data_dict['00004151']

[{'date': '2007/09/28',
  'session': '002',
  'segment': '002',
  'total_segments': '003',
  'labels': ['bckg'],
  'durations': ['256.0000'],
  'channels': '41',
  'number_obs': '64000',
  'sample_rate': '250.0',
  'config': 'le',
  'type': 'dev_test',
  'loc': '/home/jupyter/tuh_eeg/data/raw/v1.5.0/edf/dev_test/02_tcp_le/041/00004151/s002_2007_09_28/00004151_s002_t002.edf',
  'label_file': '/home/jupyter/tuh_eeg/data/raw/v1.5.0/edf/dev_test/02_tcp_le/041/00004151/s002_2007_09_28/00004151_s002_t002.tse'},
 {'date': '2007/09/28',
  'session': '002',
  'segment': '001',
  'total_segments': '003',
  'labels': ['bckg'],
  'durations': ['490.0000'],
  'channels': '41',
  'number_obs': '122500',
  'sample_rate': '250.0',
  'config': 'le',
  'type': 'dev_test',
  'loc': '/home/jupyter/tuh_eeg/data/raw/v1.5.0/edf/dev_test/02_tcp_le/041/00004151/s002_2007_09_28/00004151_s002_t001.edf',
  'label_file': '/home/jupyter/tuh_eeg/data/raw/v1.5.0/edf/dev_test/02_tcp_le/041/00004151/s002_2007_09_28/000

# Observations per Electrode Config

In [7]:
configs = []
for key, value in data_dict.items():
    for entry in value:
        configs.append(entry['config'])

In [8]:
collections.Counter(configs)

Counter({'le': 535, 'ar': 4074, 'a': 1001})

## Time per Electrode Config as well

In [9]:
configs = defaultdict(list)
for key, value in data_dict.items():
    for entry in value:
        configs[entry['config']].append(float(entry['durations'][-1]))

In [10]:
summary = defaultdict()
for key, value in sorted(configs.items()):
    summary[key] = sum(value)
for key, value in summary.items():
    print(key, value)

a 452039.0
ar 2376345.0
le 493132.0


In [11]:
total = sum(summary.values())
for key, value in sorted(summary.items(), key=operator.itemgetter(1), reverse=True):
    print(f'{key} {(value/total)*100:.2f}%')

ar 71.54%
le 14.85%
a 13.61%


# Time Breakdown for each Label

In [12]:
time_breakdown = collections.defaultdict(list)
for key, value in data_dict.items():
    for entry in value:
        time_list = zip(entry['labels'], entry['durations'])
        last = 0
        for label, curr in time_list:
            time = float(curr) - last
            time_breakdown[label].append(time)
            last = float(curr)

In [13]:
summary = defaultdict()
for key, value in time_breakdown.items():
    summary[key] = sum(value)

sorted(summary.items())

[('absz', 851.9839999999983),
 ('bckg', 3092339.9311999874),
 ('cpsz', 35342.824),
 ('fnsz', 122307.46089999966),
 ('gnsz', 60024.922299999984),
 ('mysz', 1312.0),
 ('spsz', 2145.8244000000004),
 ('tcsz', 5879.5585),
 ('tnsz', 1311.4947)]

In [14]:
total = sum(summary.values())

for key, value in sorted(summary.items(), key=operator.itemgetter(1), reverse=True):
    print(f'{key} {(value/total)*100:.2f}%')

bckg 93.10%
fnsz 3.68%
gnsz 1.81%
cpsz 1.06%
tcsz 0.18%
spsz 0.06%
mysz 0.04%
tnsz 0.04%
absz 0.03%


# Channel Breakdown

In [15]:
time_breakdown = defaultdict(list)
for key, value in data_dict.items():
    for entry in value:
        time_breakdown[int(entry['channels'])].append(float(entry['durations'][-1]))

In [16]:
summary = defaultdict()
for key, value in time_breakdown.items():
    summary[key]= sum(value)

sorted(summary.items())

[(25, 6007.0),
 (26, 4079.0),
 (27, 322983.0),
 (28, 114145.0),
 (29, 315438.0),
 (30, 322669.0),
 (31, 243163.0),
 (32, 326016.0),
 (33, 550259.0),
 (34, 572917.0),
 (35, 5879.0),
 (36, 242326.0),
 (41, 281907.0),
 (128, 1250.0),
 (129, 12478.0)]

In [17]:
total = sum(summary.values())

for key, value in sorted(summary.items(), key=operator.itemgetter(1), reverse=True):
    print(f'{key} {(value/total)*100:.2f}%')

34 17.25%
33 16.57%
32 9.82%
27 9.72%
30 9.71%
29 9.50%
41 8.49%
31 7.32%
36 7.30%
28 3.44%
129 0.38%
25 0.18%
35 0.18%
26 0.12%
128 0.04%


# Hertz Breakdown

In [18]:
hertz = collections.defaultdict(int)
for key, value in data_dict.items():
    for entry in value:
        hertz[int(float(entry['sample_rate']))] += 1
hertz.items()

dict_items([(250, 1045), (400, 637), (256, 3750), (512, 127), (1000, 51)])

In [19]:
total_hertz = sum(hertz.values())
for key, value in sorted(hertz.items(), key=operator.itemgetter(1), reverse=True):
    print(f'{key} {(value/total_hertz)*100:.2f}%')

256 66.84%
250 18.63%
400 11.35%
512 2.26%
1000 0.91%
