In [None]:
from __future__ import print_function
import time

import pandas as pd
import numpy as np
from functools import reduce
import math
import scipy.io
from scipy.interpolate import griddata
from sklearn.preprocessing import scale

In [None]:
def get_frame_intervals(rate, samples, frame_duration, overlap):
    """
    Get the intervals of each frame in the dataset
    :param rate: sampling rate of device (in Hz)
    :param samples: number of samples in the session
    :param frame_duration: desired duration of frame in seconds
    :param overlap: a float that represents a fraction of the frame that can overlap [0, 1)
    
    :return: list of tuple ranges i.e. intervals
    """
    
    dataIndex = 0
    intervals = []
    samples_per_frame = rate * frame_duration
    while dataIndex + samples_per_frame <= samples:
        intervals.append( (dataIndex, int(dataIndex + samples_per_frame) ) )
        dataIndex = dataIndex + int(samples_per_frame) - int(overlap * samples_per_frame)
    
    return intervals

In [None]:
intv = get_frame_intervals(128.0, 1000, 3, 0)
print(intv)
print(intv[0][0])
print(intv[0][1])

In [None]:
length = 10
lst = np.arange(length)
T = length/128.0
freqs = lst/T
freqs = freqs[range(length//2)]

print(freqs)

In [None]:
def get_fft(snippet, rate):
    """
    Get the frequencies and FFT values given the data snippet and the sampling rate of the data
    :param snippet: a data frame of a particular channel i.e. electrode
    :param rate: sampling rate of data
    
    :return freqs: an array of frequencies of the FFT
    :return Y: an array of FFT values
    """
    
    length = len(snippet)
    lst = np.arange(length)
    T = float(length) / float(rate)
    freqs = lst / T
    freqs = freqs[range(length//2)] # // ==> floor division
    
    Y = np.fft.fft(snippet) / float(length)
    Y = Y[range(length//2)]
    
    return freqs, abs(Y)

In [None]:
def get_EEG_bands(freqs, Y):
    """
    Get EEG bandwave averages given FFT
    :param freqs: frequencies of FFT
    :param Y: value of FFT
    
    :return: delta, theta, alpha, beta, and gamma averages
    """
    delta_range = (0,4)
    theta_range = (4,8)
    alpha_range = (8,14)
    beta_range = (14,32)
    gamma_range = (32,50)
    
    delta = Y[(freqs>delta_range[0]) & (freqs<=delta_range[1])].mean()
    theta = Y[(freqs>theta_range[0]) & (freqs<=theta_range[1])].mean()
    alpha = Y[(freqs>alpha_range[0]) & (freqs<=alpha_range[1])].mean()
    beta = Y[(freqs>beta_range[0]) & (freqs<=beta_range[1])].mean()
    gamma = Y[(freqs>gamma_range[0]) & (freqs<=gamma_range[1])].mean()
    return delta, theta, alpha, beta, gamma

In [None]:
w, h = 8, 5
matrix = np.array([[0 for x in range(w)] for y in range(h)])

print(matrix)
print(matrix.shape)
matrix[0][0] = 5
matrix[0][1] = 6
matrix[0][7] = 9
print(matrix)
print(matrix.shape)
print("list")

for lst in matrix:
    print(lst[0:5])
print(matrix.shape[0]) 
print(matrix.shape[1])

tmp1 = []
tmp1.append([1,2,3,4,5])
tmp1.append([2,3,4,5,6])
tmp1 = np.transpose(np.array(tmp1))
tmp2 = []
tmp2.append([0,0,0,0,0])
tmp2.append([1,1,1,1,1])
tmp2 = np.transpose(np.array(tmp2))
tmp3 = []
tmp3.append(tmp1)
tmp3.append(tmp2)
tmp3 = np.array(tmp3)
print("frames")
print(tmp3)
print("frame 0 i.e. sample 0")
print(tmp3[0])
print("band 0 (delta) of frame 0")
print(tmp3[0][0])
print("electrode 0 of band 0 (delta) of frame 0")
print(tmp3[0][0][0])
tmp3 = tmp3.reshape(len(tmp3), -1)
print(tmp3)

In [None]:
def get_frames(dataframe, rate, frame_duration, overlap):
    """
    Get the data of each frame in the dataframe
    :param dataframe: array of data with all channels (i.e. electrodes) of a session
        shape: [m-electrodes, eeg signals in topological order ==> time-based]
    :param rate: sampling rate of device (in Hz)
    :param frame_duration: desired frame duration in seconds
    :param overlap: a float that represents a fraction of the frame that can overlap [0, 1)
    
    :return: np-array of delta, theta, alpha, beta, and gamma averages for each electrode (i.e. channel) of each time step (i.e. frame interval)
        shape: [m-frames, n-bands, o-electrodes]
    """
    
    samples_per_frame = rate * frame_duration
    frames = []
    intervals = get_frame_intervals(rate, dataframe.shape[1], frame_duration, overlap)
    
    for i,_ in enumerate(intervals):
        frame = []
        for channel in dataframe: # channel ==> electrode
            snippet = channel[intervals[i][0]:intervals[i][1]]
            freqs, Y = get_fft(snippet)
            delta, theta, alpha, beta, gamma = get_EEG_bands(freqs, Y)
            frame.append([delta, theta, alpha, beta, gamma])
        frame = np.transpose(np.array(frame))
        frames.append(frame)
    return np.array(frames)

In [None]:
def gen_images(locations, features, img_size, normalize=True):
    """
    Generate EEG images given the electrode locations in 2D space and feature values for each electrode
    :param locations: an array containing the x and y coorindate locations of each electrode
        shape: [# of electrodes, 2]
    :param features: the feature matrix
        shape: [# of frames i.e. intervals, features]
        format of features: [delta1, delta2, ..., deltaN, theta1, theta2, ..., thetaN, ..., gamma1, gamma2, ..., gammaN]
    :param img_size: number of pixels in the output images
    :param normalize: flag for whether to normalize each band over all samples or not
    
    TODO in Future:
    :param augment: flag for generating augmented images
    :param pca: flag for PCA based data augmentation
    :param std_mult: multiplier for added noise
    :param n_components: number of components in PCA to retain for augmentation
    
    :return: the generated images
        shape: [samples, colors, width, height]
    """
    img_size
    # check whether feature size is divisible by the number of electrodes i.e. channels
    assert features.shape[1] % locations.shape[0] == 0
    
    feats = []
    colors = features.shape[1] // locations.shape[0] # number of bands
    for color in range(colors):
        feats.append(features[:, color * locations.shape[0] : (color+1) * locations.shape[0] - 1])
    
    samples = features.shape[0]
    grid_x, grid_y = np.mgrid[
        min(locations[:, 0]):max(locations[:, 0]):img_size*1j,
        min(locations[:, 1]):max(locations[:, 1]):img_size*1j
    ]
    
    interpolations = []
    for color in range(colors):
        interpolations.append(np.zeros([samples, img_size, img_size]))
    
    for sample in range(samples):
        for color in range(colors):
            interpolations[color][sample, :, :] = griddata(locations, feats[color][sample, :], (grid_x, grid_y), method='cubic', fill_value=np.nan)
        print("Interpolating " + str(sample+1) + " of " + str(samples))
    
    for color in range(colors):
        if normalize:
            interpolation[color][~np.isnan(interpolation[color])] = scale(interpolation[color][~np.isnan(interpolation[color])])
        interpolation[color] = np.nan_to_num(interpolation[color])
    
    return np.swapaxes(np.asarray(interpolation), 0, 1)

In [None]:
def data_pipeline(filenames, labels, img_size, locations, rate, frame_duration, overlap):
    """
    Get the array of frames and their respective labels
    :param filenames: list of strings for each input datafile (needs to formatted)
    :param labels: list of labels for each input datafile (in-order)
    :param img_size: int size of output images in the form (n, n)
    :param locations: an array containing the x and y coorindate locations of each electrode
        shape: [# of electrodes, 2]
    :param rate: sampling rate of device (in Hz)
    :param frame_duration: desired frame duration in seconds
    :param overlap: a float that represents a fraction of the frame that can overlap [0, 1)
    
    :return data: np-array of image frames (unshuffled)
    :return Y: np-array of labels for each frame respectively (1 or 0)
    """
    
    samples_per_frame = rate * frame_duration
    print("Data Pipeline: Generating...")
    
    data = []
    Y = []
    for i, file in enumerate(filenames):
        print("Processing file: " + file + " [" + str(i+1) + " of " + str(len(filenames)) + "]")
        data = np.genfromtxt(file, delimiter=',')
        
        # TODO Now: format csv files
        formatted_data = data
        
        X = get_frames(formatted_data, rate, frame_duration, overlap)
        X = X.reshape(len(X), -1)
        
        images = gen_images(np.array(locations), X, img_size, normalize=False)
        print(str(len(images)) + " frames were generated with label " + str(labels[i]) + ".")
        if i == 0:
            data = images
            Y = np.ones(len(images)) * labels[i]
        else:
            data = np.concatenate((data, images), axis=0)
            Y = np.concatenate((Y, np.ones(len(images)) * labels[i]), axis=0)
            
    return np.array(data), np.array(Y)

In [None]:
filenames = ['data/ED001_T.csv',
              'data/ED001_N.csv',
              'data/ED002_T.csv',
              'data/ED002_N.csv']
labels = [1,0,1,0]
img_size = 28
location_2d = [(-2.0,2.0),
               (1.0,3.0),
               (-1.0,3.0),
               (4.0,1.0)]
rate = 128.0
frame_duration = 1.0
overlap = 0.25
X, Y = data_pipeline(filenames, labels, img_size, location_2d, rate, frame_duration, overlap)

In [None]:
import matplotlib.pyplot as plt
plt.imshow(X[0])