# Magics, Imports, and Versions

In [9]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import json
import operator
import numpy as np
from pathlib import Path
import pyedflib
from scipy import signal
from subprocess import call
import sys
import typing
import urllib

import fastai
from fastai.vision import *
from fastai.callbacks.hooks import *
from fastai.utils.mem import *

In [10]:
print(f'__Python VERSION: {sys.version}')

try:
    print(f'__pyTorch VERSION: {torch.__version__}')
    PYTORCH = True
except: 
    print("Pytorch Not Installed")
    PYTORCH = False

try:
    print(f'__fastai VERSION: {fastai.__version__}')
except:
    print("fastai Not Installed")
    
print('__CUDA VERSION')

! nvcc --version

if PYTORCH:
    print(f'__CUDNN VERSION: {torch.backends.cudnn.version()}')
    print(f'__Number CUDA Devices: {torch.cuda.device_count()}')
    
print(f'__Devices')

try:
    call(["nvidia-smi", "--format=csv", "--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free"])
    print(f'Active CUDA Device: GPU {torch.cuda.current_device()}')

    print (f'Available devices: {torch.cuda.device_count()}')
    print (f'Current cuda device {torch.cuda.current_device()}')
except:
    print("No GPUs Found")

__Python VERSION: 3.7.1 (default, Dec 14 2018, 19:28:38) 
[GCC 7.3.0]
__pyTorch VERSION: 1.0.1.post2
__fastai VERSION: 1.0.53.dev0
__CUDA VERSION
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2018 NVIDIA Corporation
Built on Sat_Aug_25_21:08:01_CDT_2018
Cuda compilation tools, release 10.0, V10.0.130
__CUDNN VERSION: 7402
__Number CUDA Devices: 1
__Devices
Active CUDA Device: GPU 0
Available devices: 1
Current cuda device 0


# Load In Data Dict

In [11]:
here = Path(f'{os.getcwd()}')
data_path = here.parent/'data'
raw_path = data_path/'raw'/'v1.5.0/edf'
with open(here/'data_dict.json', 'r') as file:
     data_dict = json.load(file)

In [12]:
data_dict['00004151']

[{'date': '2007/09/28',
  'session': '002',
  'segment': '002',
  'total_segments': '003',
  'labels': ['bckg'],
  'durations': ['256.0000'],
  'channels': '41',
  'number_obs': '64000',
  'sample_rate': '250.0',
  'config': 'le',
  'type': 'dev_test',
  'loc': '/home/jupyter/tuh_eeg/data/raw/v1.5.0/edf/dev_test/02_tcp_le/041/00004151/s002_2007_09_28/00004151_s002_t002.edf',
  'label_file': '/home/jupyter/tuh_eeg/data/raw/v1.5.0/edf/dev_test/02_tcp_le/041/00004151/s002_2007_09_28/00004151_s002_t002.tse'},
 {'date': '2007/09/28',
  'session': '002',
  'segment': '001',
  'total_segments': '003',
  'labels': ['bckg'],
  'durations': ['490.0000'],
  'channels': '41',
  'number_obs': '122500',
  'sample_rate': '250.0',
  'config': 'le',
  'type': 'dev_test',
  'loc': '/home/jupyter/tuh_eeg/data/raw/v1.5.0/edf/dev_test/02_tcp_le/041/00004151/s002_2007_09_28/00004151_s002_t001.edf',
  'label_file': '/home/jupyter/tuh_eeg/data/raw/v1.5.0/edf/dev_test/02_tcp_le/041/00004151/s002_2007_09_28/000

# Ensure Directories

In [13]:
def ensure_dir(directory):
    if not os.path.exists(directory):
        print(f'Creating {directory}')
        os.makedirs(directory)

In [14]:
ensure_dir(data_path/'augmented')
ensure_dir(data_path/'augmented/train_val')
ensure_dir(data_path/'augmented/test')

In [15]:
train_path = data_path/'augmented/train_val'
test_path = data_path/'augmented/test'

# Design New File Names

In [16]:
# f"{config}_{key}_s{session}_d{date}_t{segment}_ts{total_segments}_ch{ch}_h{h}_w{w}_o{o}_st{st}_et{et}_p{p}_tp{tp}"
# le_00004151_s002_d2007_09_28_t002_ts003_ch041_h12_w1_o25_st00005.25_et00006.25_p00008_tp00342

# Define Transformations and Processors

In [64]:
def process_eeg(eeg, key, entry, h=0, w=0, op=0):
    length = float(entry['durations'][-1])
    step_value = w - (w * (op/100))
    piece = 1
    total_pieces = len(np.arange(0,length,step_value))
    for start in np.arange(0, length, step_value):
        end = start + w
        fn = build_filename(key, entry, h, w, op, start, end, piece, total_pieces)
        start_obs = int(start * h)
        end_obs = int(end * h)
        new_eeg = eeg[::,start_obs:end_obs]
        #save_eeg(new_eeg, fn, value)
        write_labels(start, end, entry, fn)
        piece += 1
        

def write_labels(start, end, entry, fn):
    ground_truth = zip(entry['durations'],entry['labels'])
    
    
def down_sample_eeg(eeg, entry, h):
    num_elems = int(float(entry['durations'][-1])) * h
    dn_sampled = signal.resample(eeg, num=num_elems, axis=1)
    return dn_sampled


def save_eeg(new_eeg, fn, value):
    if value['type'] == 'dev_test':
        np.save(f'{test_path/fn}.npy',new_eeg)
    else:
        np.save(f'{train_path/fn}.npy',new_eeg)

        
def get_eeg(entry):
    f = pyedflib.EdfReader(entry['loc'])
    n = f.signals_in_file
    signal_labels = f.getSignalLabels()
    sigbufs = np.zeros((n, f.getNSamples()[0]))
    for i in np.arange(n):
        sigbufs[i, :] = f.readSignal(i)
    return sigbufs
    
    
def build_filename(key, entry, h, w ,op, st, et, p, tp):
    date = entry['date'].replace('/','_')
    s = entry['session']
    c = entry['config']
    t = entry['segment']
    ts = entry['total_segments']
    ch = entry['channels']
    return f"{c}_{key}_s{s}_d{date}_t{t}_ts{ts}_ch{ch.zfill(3)}_h{h}_w{w}_o{o}_st{str(st).zfill(8)}_et{str(et).zfill(8)}_p{str(p).zfill(5)}_tp{str(tp).zfill(5)}"

In [65]:
H_VALUES, W_VALUES, O_VALUES = (12,24,48,64,96), (1,2,4,6,8), (25,50,75)
for key, value in data_dict.items():
    if key == '00004151':
        for entry in value:
            eeg = get_eeg(entry)
            print(eeg.shape)
            for h in H_VALUES:
                dn_sampled = down_sample_eeg(eeg, entry, h)
                print(h, dn_sampled.shape)
                for w ,o in list(itertools.product(W_VALUES, O_VALUES)):
                    process_eeg(dn_sampled, key, entry, h, w, o)

(41, 64000)
12 (41, 3072)
24 (41, 6144)
48 (41, 12288)
64 (41, 16384)
96 (41, 24576)
(41, 122500)
12 (41, 5880)
24 (41, 11760)
48 (41, 23520)
64 (41, 31360)
96 (41, 47040)
(41, 110750)
12 (41, 5316)
24 (41, 10632)
48 (41, 21264)
64 (41, 28352)
96 (41, 42528)
(41, 355750)
12 (41, 17076)
24 (41, 34152)
48 (41, 68304)
64 (41, 91072)
96 (41, 136608)


# EDF Contents (preserve?)

['__class__', '__del__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_close', '_convert_string', '_get_float', 'admincode', 'annotations_in_file', 'birthdate', 'check_open_ok', 'datarecord_duration', 'datarecords_in_file', 'digital_max', 'digital_min', 'equipment', 'file_duration', 'file_info', 'file_info_long', 'file_name', 'gender', 'getAdmincode', 'getBirthdate', 'getDigitalMaximum', 'getDigitalMinimum', 'getEquipment', 'getFileDuration', 'getGender', 'getHeader', 'getLabel', 'getNSamples', 'getPatientAdditional', 'getPatientCode', 'getPatientName', 'getPhysicalDimension', 'getPhysicalMaximum', 'getPhysicalMinimum', 'getPrefilter', 'getRecordingAdditional', 'getSampleFrequencies', 'getSampleFrequency', 'getSignalHeader', 'getSignalHeaders', 'getSignalLabels', 'getStartdatetime', 'getTechnician', 'getTransducer', 'handle', 'load_datarecord', 'make_buffer', 'open', 'patient', 'patient_additional', 'patientcode', 'patientname', 'physical_dimension', 'physical_max', 'physical_min', 'prefilter', 'readAnnotations', 'readSignal', 'read_annotation', 'read_digital_signal', 'readsignal', 'recording_additional', 'samplefrequency', 'samples_in_datarecord', 'samples_in_file', 'signal_label', 'signals_in_file', 'startdate_day', 'startdate_month', 'startdate_year', 'starttime_hour', 'starttime_minute', 'starttime_second', 'technician', 'transducer']