# Overview

This is a Jupyter notebook (work in progress) to visualize the data gathered during our testing with the SATURN patch.

The basic data filtering approach will be as follows:

For each dataset containing a time series of voltage readings:
1. Trim the dataset's beginning and end for "dead zones" where there is no data.
2. Smooth the dataset by taking the moving average.
3. Split the dataset into individual segments, since one dataset typically contains 5 individual readings of 1 gesture.
    
Then the data can be visualized. We're trying a few different approaches:
* [All Data](#All-Data) contains a list of the raw waveforms for all datasets. Useful for debugging.
* [All Data, Sliced and Overlaid](#All-Data--Sliced-and-Overlaid) contains a chart for each waveform containing all the segments, adjusted to be the same length and overlaid upon each other. Useful for validating that input data is consistent.
* [Segments by Gesture](#Segments-by-Gesture) contains a chart for each gesture. In each gesture chart, the segmented data from each configuration of SATURN (ex. with backing material, without backing material, large pad, small pad, etc.) is overlaid. This will help us determine what SATURN configurations produce the most differentiable signals.

# Imports and Utility Functions

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import itertools
import os
from numpy.fft import fft, fftfreq, ifft, rfft

# Returns a list of the datasets in the data directory.
# Each dataset in this list is a dict with three attributes:
#   Dir: directory under data/ occupied by this dataset
#   File: filename without extension
#   Path: relative path to .csv
def get_available_datasets():
    data = []

    for datafile in filter(lambda x: x[-4:] == '.csv',
        list(itertools.chain(*[[root+'/'+file for file in files]
        for root, _, files in os.walk('./data')]))):
        
        n = datafile.split('/')
        data.append({
            'Dir': n[-2],
            'File': n[-1].split('.')[0],
            'Path': datafile
        })

    return data

# Returns a dataset's contents as a list.
# If `raw` is not set, the results will be filtered and normalized.
def load_dataset(datum, raw=False):
    dataset = pd.read_csv(datum['Path'], names=['V'], header=None)
    dataset = list(dataset['V'])
    if not raw:  # normalize and filter the data
        dataset = normalize_dataset(dataset)
        dataset = trim_dataset(dataset)
        dataset = moving_average(dataset, 20)
        #dataset = segment_dataset(dataset, threshold=0.01, min_spacing=100)
    return dataset

# Normalize a dataset by dividing by max amplitude.
def normalize_dataset(data):
    return np.divide(data, np.max(data))

# Removes all leading and trailing points that fall under a certain threshold.
# The idea is to remove the irrelevant start and end portions of our data.
# Returns the relevant slice.
def trim_dataset(data, threshold=0.01):
    i = 0
    while abs(data[i]) < threshold and i < len(data):
        i += 1
    j = len(data) - 1
    while abs(data[j]) < threshold and j > i:
        j -= 1
    return data[i:j]

# Returns a list of the contiguous segments of the dataset.
# A contiguous segment is defined as some slice separated by at least 
# `min_spacing` points where the signal is less than `threshold` from 
# the next segment.
# Returns a list of lists (segments)
def segment_dataset(data, threshold=0.01, min_spacing=100):
    i = 0
    spaces = 0
    intervals = []
    start = None
    while i < len(data):
        while abs(data[i]) < threshold and i < len(data): 
            i += 1
        start = i
        spaces = 0
        end = i
        while i < len(data):
            if abs(data[i]) < threshold:
                spaces += 1
            else:
                spaces = 0
                end = i
            if spaces > min_spacing:
                intervals.append((start, end))
                start = None
                end = None
    if start and not end:
        intervals.append((start, i))
    print(intervals)
    return [data[interval[0]:interval[1]] for interval in intervals]

# Given a `data` as a list and a `window_size` number, it will return
# `data` with each point averaged with the previous `window_size` points.
def moving_average(data, window_size):
    res = []
    for (i,t) in enumerate(data):
        if i == 0:
            continue
        start = i - window_size
        if start < 0:
            start = 0
        end = i
        sl = data[start:end]
        x = np.average(sl)
        res.append(x)
    return res

data = get_available_datasets()

plt.rcParams["figure.figsize"] = (20,8)  # change size of charts

# All Data

In [None]:
for (i, datum) in enumerate(data):
    df = load_dataset(datum)
    plt.figure(i)
    plt.ylabel('Signal (V)')
    plt.xlabel('Time (ms)')
    w = plt.plot([float(i)*(float(1)/50) for i in range(0, len(df))], df, linewidth=1.0)
    plt.title(datum['File'] + ' (' + datum['Dir'] + ')')

# Waveforms by Gesture

In [None]:
g = {}
for (i, datum) in enumerate(data):
    if datum['File'] not in g:
        g[datum['File']] = []
    g[datum['File']].append(datum)
for (i, gesture) in enumerate(g.keys()):
    gdata = g[gesture]
    plt.figure(i)
    plt.ylabel('Signal (V)')
    plt.xlabel('Time (ms)')
    for datum in gdata:
        df = load_dataset(datum)
        w = plt.plot([float(i)*(float(1)/50) for i in range(0, len(df))], df, linewidth=1.0, label=datum['Dir'])
    plt.title('All ' + gesture + ' Gestures')

FFT for each Data

In [None]:
# datasets_dict = get_available_datasets()
# for (i, datum) in enumerate(datasets_dict):
#     cur_dataset = load_dataset(datum, raw=False)
#     time_range = len(cur_dataset)/50000

#     freqs = fftfreq(len(cur_dataset))

#     # only include positive frequencies
#     mask = freqs > 0
#     fft_vals = fft(cur_dataset)
    
#     # range of x-values (time), one coordinate per data
#     x = np.linspace(0, time_range, len(cur_dataset))

#     # true theoretical fft
#     fft_theo = 2.0* np.abs(fft_vals/len(cur_dataset))

#     plt.figure(i)
    
#     plt.plot(freqs[mask], fft_theo[mask], label = 'true fft values')
#     plt.title('FFT values' +  datum['Dir'])


# Segmentation (finding Gestures)

In [7]:
def findGesturesOfAllData():
    datasets_dict = get_available_datasets()
    map_dataname_to_gestures_indices_list = {}
    
    for (i, datum) in enumerate(datasets_dict):
        cur_dataset = load_dataset(datum, raw=False)
        total_time_range = len(cur_dataset)/50000

        sample_win_size = 50000
        start_win = 0
        end_win = 49999
        
        map_start_to_fft_sum = {}
        signals_list = []
        
        # list of sums of all fft bin for each window used on this dataset 
        # each window represents one second of the signal
        list_of_fftSums = []

        print('____', datum['Path'])

        while start_win < len(cur_dataset):
            
            # each window covers an event occuring in one second of time
            window_data = cur_dataset[start_win:end_win]
            
            freqs = fftfreq(len(window_data))
            ##sum_of_fftBins = sum(np.abs(freqs))

            # getting sum of fft bins, then the sum of their frequency values
            
            fft_vals = rfft(window_data)
            # fftTheo = 2.0* np.abs(fft_vals/len(cur_dataset))
            ## sum_of_values = sum(fft_theo)

            sum_of_fftValues = sum(np.abs(fft_vals))

            # scale smaller slices - no idea if this is accurate
            sum_of_fftValues *= (sample_win_size / (end_win - start_win))
            
            # appending sum of the fftbins to list 
            list_of_fftSums += [sum_of_fftValues]
            
            # update the window size to include 50000 samples, half new and half old.
            start_win += 25000
            end_win = min(end_win + 25000, len(cur_dataset))

            print(sum_of_fftValues)
        
        threshold = 45000
        print("Threshold: " + str(threshold))
        fftSums = list_of_fftSums
        i = 0
        gestures = []
        while i < len(fftSums):
            if fftSums[i] > threshold:
                start = i
                while i < len(fftSums) and fftSums[i] > threshold:
                    i += 1
                end = i
                gestures += [(25000*start, 25000*end)]
            i += 1
        
        print(gestures)
        map_dataname_to_gestures_indices_list[datum['Path']] = gestures
        
        #go to next dataset

# returns the list of gesture events happening in dataset
# data: a data file object, can be opened with call to load_dataset()
def findGesturesInGivenData(data):
    cur_dataset = load_dataset(data, raw=False)
    time_range = len(cur_dataset)/50000

    win_size = 50000
    start_win = 0
    end_win = 49999
        
    map_start_to_fft_sum = {}
    signals_list = []
        
    # list of sums of all fft bin for each window used on this dataset 
    # each window represents one second of the signal
    list_of_fftSums = []

        
    while end_win < len(cur_dataset):
            
        # each window covers an event occuring in one second of time
        window_data = cur_dataset[start_win:end_win]
        freqs = fftfreq(len(window_data))
            
        # fft_vals = fft(window_data)
        
        # getting sum of fft bins, then the sum of their frequency values
        sum_of_fftBins = sum(np.abs(freqs))
        ## sum_of_values = sum(fft_theo)
            
            
        # appending sum of the fftbins to list 
        list_of_fftSums += [sum_of_fftBins]

        # update the window size to include 50000 samples, half new and half old.
        start_win += 25000
        end_win += 25000


    list_of_gesture_starts = []
    # variables to toggle as any local maxima or minima are found in list_of_fftSums.
    gesture_start_found = False
    gesture_end_found = False
    for i in range(len(list_of_fftSums)):

        # We will compare against prev. and next sums to determine if a new
        # gesture is starting or ending
        if i + 1 < len(list_of_fftSums):
            if len(list_of_gesture_starts) == 0:
                if (list_of_fftSums[i+1] <= list_of_fftSums[i] and 
                    list_of_fftSums[i-1] < list_of_fftSums[i]):

                    gesture_end_found = False
                    gesture_start_found = True

                    # we assume gesture ends in this window so data in l._o._g._s. has a consistent type always
                    list_of_gesture_starts += [(i*25000, i*25000 + 50000)]

            elif (gesture_start_found and list_of_fftSums[i+1] >= list_of_fftSums[i] and 
                list_of_fftSums[i-1] > list_of_fftSums[i]):

                # if end of gesture is found (a local minimum in list of sums) record it in end of tuple
                # of last element in l._o._g._s.
                gesture_end_found = True
                gesture_start_found = False
                temp = list_of_gesture_starts[-1][0]
                list_of_gesture_starts[-1] = (temp, (i)*25000 + 25000)
                temp = None

            elif (gesture_end_found and list_of_fftSums[i+1] <= list_of_fftSums[i] and
                list_of_fftSums[i-1] < list_of_fftSums[i]):

                gesture_end_found = False
                gesture_start_found = True

                # we assume gesture ends in this window so data in l._o._g._s. has a consistent type always
                list_of_gesture_starts += [(i*25000, i*25000 + 50000)]

    return list_of_gesture_starts

findGesturesOfAllData()


____ ./data/large-pad-squares/swipedown.csv
34955.04569514119
50343.317103499794
38576.5674289331
34761.77327294871
49206.43774378407
42195.7871663869
37156.56037026114
53012.76640250455
40472.66537112599
60117.13147956917
76082.41886118482
56873.07484900819
70931.70533528575
80700.56991178845
49069.02774240325
Threshold: 45000
[(25000, 50000), (100000, 125000), (175000, 200000), (225000, 375000)]
____ ./data/large-pad-squares/swipeup.csv
1046.6254787520083
452.77278952121156
83459.23565455971
126766.14272344697
93198.56176207084
27696.102294113945
32590.098055799255
50843.16095571861
54677.27038860873
38376.383259664675
56231.26369783253
44295.134631362766
43064.975634801274
43875.295336709736
40607.994621570164
64672.24535810333
55990.160496064476
56345.119470800855
66892.4245829863
38667.9450524529
Threshold: 45000
[(50000, 125000), (175000, 225000), (250000, 275000), (375000, 475000)]
____ ./data/large-pad-squares/swiperight.csv
839.5374578711537
957.2740464176945
26658.48878002964

____ ./data/large-pad-circledots/swipeup.csv
1183.0065890612289
21551.444003920704
46161.40344883298
47095.44935487183
21181.72357258213
47478.34287154007
53782.45246271674
40804.75004746286
47256.45823665138
72440.66232007257
66951.93793946976
53185.3380491635
80856.10487769902
79521.48620429248
42510.547691594795
55827.569579809504
76658.09642790892
59838.25959070393
3010.1184731585113
Threshold: 45000
[(50000, 100000), (125000, 175000), (200000, 350000), (375000, 450000)]
____ ./data/large-pad-circledots/swiperight.csv
3852.212143772096
1070.2461135940216
915.314657142003
1079.2348757235318
36603.72593860129
44026.020941211995
38815.65835788648
34153.58097049951
35316.06213771357
42732.27766752153
30047.46833176414
40004.02631450296
44103.88512786965
42524.816416130765
40255.91589617979
42536.71409366005
52300.022770127085
40520.65005243995
39257.486325487815
63997.11587590303
51863.8827156914
8843.238892103462
4398.905443218401
Threshold: 45000
[(400000, 425000), (475000, 525000)]
