# Processing TVB Simulated FFT Maps

Here, I run through the FFT computed data to create the images that will be fed into the deep learning pipeline.

In [18]:
import sys
sys.path.append('../../dnn/')
sys.path.append('../dnn/')
import time
import numpy as np
# np.random.seed(1234)
import math as m
import os

import processing
import processing.preprocessfft
from processing.util import DataHandler
import peakdetect

# import DNN frameworks
import tensorflow as tf
import keras
from sklearn.decomposition import PCA

import ntpath

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
datadir = '/Volumes/ADAM LI/pydata/output/outputfft/tvbsim/full/win500_step250/'

testfilename = 'id008_gc_dist-1.0_fftmodel.npz'

data = np.load(os.path.join(datadir, testfilename), encoding='bytes')
metadata = data['metadata'].item()
print(data.keys())
print(metadata.keys())

['phase', 'timepoints', 'freqs', 'power', 'metadata']
dict_keys(['gainmat', 'x0norm', b'stepsize', 'pz', 'onsettimes', 'seeg_xyz', b'winsize', 'x0ez', 'regions_centers', 'regions', 'epiparams', 'offsettimes', b'seizoffsets', 'x0pz', 'ez', 'pzindices', 'patient', 'samplerate', b'seizonsets', 'ezindices', 'chanlabels'])


# Run Through the Simulated FFT Computations

- get all the datafiles
- compute into images

In [38]:
# get all datafiles for the fft maps
fftdatadir = '/Volumes/ADAM LI/pydata/output/outputfft/tvbsim/full/'
# Get ALL datafiles from all downstream files
datafiles = []
for root, dirs, files in os.walk(fftdatadir):
    for file in files:
        if '.DS' not in file:
            datafiles.append(os.path.join(root, file))
print(len(datafiles))
# print(datafiles[7:])

expname = 'expfull'
trainimagedir = os.path.join('/Volumes/ADAM LI/pydata/dnn/traindata_fft/', 
                        expname)
if not os.path.exists(trainimagedir):
    os.makedirs(trainimagedir)
print(trainimagedir)

18
/Volumes/ADAM LI/pydata/dnn/traindata_fft/expfull


In [27]:
# establish frequency bands
freqbands = {
        'dalpha':[0,15],
        'beta':[15,30],
        'gamma':[30,90],
        'high':[90,200],
    }
postprocessfft = processing.preprocessfft.PreProcess(freqbands=freqbands)

winsizems = 500
stepsizems = 250
typetransform = 'fourier'
mtbandwidth = 4
mtfreqs = []
def path_leaf(path):
    head, tail = ntpath.split(path)
    return tail or ntpath.basename(head)
def decodebytes(metadata):
    def convert(data):
        if isinstance(data, bytes):  return data.decode('ascii')
        if isinstance(data, dict):   return dict(map(convert, data.items()))
        if isinstance(data, tuple):  return map(convert, data)
        return data
    try:
        metadata = {k.decode("utf-8"): (v.decode("utf-8") if isinstance(v, bytes) else v) for k,v in metadata.items()}
    except AttributeError:
        print('trying to convert metadata bytes to unicode')
    for key in metadata.keys():
        metadata[key] = convert(metadata[key])
    return metadata

In [40]:
# define the data handler 
datahandler = DataHandler()
pca = PCA(n_components=2)

# rawdatadir = '/Volumes/ADAM LI/pydata/convertedtng/'
checkrawdata = lambda patient: os.path.join(rawdatadir, patient)

for idx, datafile in enumerate(datafiles):
    print(idx)
    # perform file identification
    dirname = os.path.dirname(datafile)
    filename = path_leaf(datafile)
    fileid = filename.split('_fftmodel')[0]
    patient = '_'.join(fileid.split('_')[0:2])
    
    # load in the data for this fft computation
    fftdata = np.load(datafile, encoding='bytes')
    power = fftdata['power']
    freqs = fftdata['freqs']
    timepoints = fftdata['timepoints']
    metadata = fftdata['metadata'].item()
    
    # extract the metadata needed
    metadata = decodebytes(metadata) 
    print(metadata.keys())
    onset_times = metadata['onsettimes']
    offset_times = metadata['offsettimes']
    seeg_labels = metadata['chanlabels']
    seeg_xyz = metadata['seeg_xyz']
    samplerate = metadata['samplerate']
    
    # get indices of channels that we have seeg_xyz for
    power = np.abs(power)
    
    # get overlapping indices on seeg with xyz
    xyzinds = [i for i,x in enumerate(seeg_labels) if any(thing==x for thing in seeg_labels)]
    seeg_xyz = seeg_xyz[xyzinds,:]
    
    print("Patient is: ", patient)
    print("file id is: ", fileid)
#     print(dirname)
#     print("Filename loaded is: ", filename)
    print("\n\n")
    print(seeg_labels.shape)
    print(power.shape)
    assert power.shape[0] == seeg_xyz.shape[0]
    assert power.shape[0] == len(seeg_labels)
    
    # postprocess fft into bins
    power = postprocessfft.binFrequencyValues(power, freqs)

    # project xyz data
    seeg_xyz = pca.fit_transform(seeg_xyz)
    
#     print(seeg_xyz.shape)
#     print(power.shape)
    # Tensor of size [samples, freqbands, W, H] containing generated images.
    image_tensor = datahandler.gen_images(seeg_xyz, power, 
                            n_gridpoints=32, normalize=False, augment=False, 
                            pca=False, std_mult=0., edgeless=False)
    print(image_tensor.shape)
    # compute ylabels    
    ylabels = datahandler.computelabels(onset_times, offset_times, timepoints)
    # instantiate metadata hash table
    metadata = dict()
    metadata['chanlabels'] = seeg_labels
    metadata['seeg_xyz'] = seeg_xyz
    metadata['ylabels'] = ylabels
    metadata['samplerate'] = samplerate
    metadata['timepoints'] = timepoints
    
    # save image and meta data
    imagefilename = os.path.join(trainimagedir, filename.split('.npz')[0])
    print(image_tensor.shape)
    print('saved at ', imagefilename)
    np.savez_compressed(imagefilename, image_tensor=image_tensor, metadata=metadata)

    

0
trying to convert metadata bytes to unicode
dict_keys(['gainmat', 'x0norm', b'stepsize', 'pz', 'onsettimes', 'seeg_xyz', b'winsize', 'x0ez', 'regions_centers', 'regions', 'epiparams', 'offsettimes', b'seizoffsets', 'x0pz', 'ez', 'pzindices', 'patient', 'samplerate', b'seizonsets', 'ezindices', 'chanlabels'])
(103,)
(103, 251, 159)
Patient is:  id008_gc
file id is:  id008_gc_dist11.0



(103,)
(103, 251, 159)
(103, 4, 159)
(103, 2)
(103, 4, 159)
(159, 103)
(159, 4, 32, 32)9/159
(159, 4, 32, 32)
saved at  /Volumes/ADAM LI/pydata/dnn/traindata_fft/expfull/id008_gc_dist11.0_fftmodel
1
trying to convert metadata bytes to unicode
dict_keys(['gainmat', 'x0norm', b'stepsize', 'pz', 'onsettimes', 'seeg_xyz', b'winsize', 'x0ez', 'regions_centers', 'regions', 'epiparams', 'offsettimes', b'seizoffsets', 'x0pz', 'ez', 'pzindices', 'patient', 'samplerate', b'seizonsets', 'ezindices', 'chanlabels'])
(103,)
(103, 251, 159)
Patient is:  id008_gc
file id is:  id008_gc_dist13.0



(103,)
(103, 251, 159