# Overview

This is a Jupyter notebook (work in progress) to visualize the data gathered during our testing with the SATURN patch.

The basic data filtering approach will be as follows:

For each dataset containing a time series of voltage readings:
1. Trim the dataset's beginning and end for "dead zones" where there is no data.
2. Smooth the dataset by taking the moving average.
3. Split the dataset into individual segments, since one dataset typically contains 5 individual readings of 1 gesture.
    
Then the data can be visualized. We're trying a few different approaches:
* [All Data](#All-Data) contains a list of the raw waveforms for all datasets. Useful for debugging.
* [All Data, Sliced and Overlaid](#All-Data--Sliced-and-Overlaid) contains a chart for each waveform containing all the segments, adjusted to be the same length and overlaid upon each other. Useful for validating that input data is consistent.
* [Segments by Gesture](#Segments-by-Gesture) contains a chart for each gesture. In each gesture chart, the segmented data from each configuration of SATURN (ex. with backing material, without backing material, large pad, small pad, etc.) is overlaid. This will help us determine what SATURN configurations produce the most differentiable signals.

# Imports and Utility Functions

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import itertools
import os
from numpy.fft import fft, fftfreq, ifft, rfft

# Returns a list of the datasets in the data directory.
# Each dataset in this list is a dict with three attributes:
#   Dir: directory under data/ occupied by this dataset
#   File: filename without extension
#   Path: relative path to .csv
def get_available_datasets():
    data = []

    for datafile in filter(lambda x: x[-4:] == '.csv',
        list(itertools.chain(*[[root+'/'+file for file in files]
        for root, _, files in os.walk('./data')]))):
        
        n = datafile.split('/')
        data.append({
            'Dir': n[-2],
            'File': n[-1].split('.')[0],
            'Path': datafile
        })

    return data

# Returns a dataset's contents as a list.
# If `raw` is not set, the results will be filtered and normalized.
def load_dataset(datum, raw=False):
    dataset = pd.read_csv(datum['Path'], names=['V'], header=None)
    dataset = list(dataset['V'])
    if not raw:  # normalize and filter the data
        dataset = normalize_dataset(dataset)
        dataset = trim_dataset(dataset)
        dataset = moving_average(dataset, 20)
        #dataset = segment_dataset(dataset, threshold=0.01, min_spacing=100)
    return dataset

# Normalize a dataset by dividing by max amplitude.
def normalize_dataset(data):
    return np.divide(data, np.max(data))

# Removes all leading and trailing points that fall under a certain threshold.
# The idea is to remove the irrelevant start and end portions of our data.
# Returns the relevant slice.
def trim_dataset(data, threshold=0.01):
    i = 0
    while abs(data[i]) < threshold and i < len(data):
        i += 1
    j = len(data) - 1
    while abs(data[j]) < threshold and j > i:
        j -= 1
    return data[i:j]

# Returns a list of the contiguous segments of the dataset.
# A contiguous segment is defined as some slice separated by at least 
# `min_spacing` points where the signal is less than `threshold` from 
# the next segment.
# Returns a list of lists (segments)
def segment_dataset(data, threshold=0.01, min_spacing=100):
    i = 0
    spaces = 0
    intervals = []
    start = None
    while i < len(data):
        while abs(data[i]) < threshold and i < len(data): 
            i += 1
        start = i
        spaces = 0
        end = i
        while i < len(data):
            if abs(data[i]) < threshold:
                spaces += 1
            else:
                spaces = 0
                end = i
            if spaces > min_spacing:
                intervals.append((start, end))
                start = None
                end = None
    if start and not end:
        intervals.append((start, i))
    print(intervals)
    return [data[interval[0]:interval[1]] for interval in intervals]

# Given a `data` as a list and a `window_size` number, it will return
# `data` with each point averaged with the previous `window_size` points.
def moving_average(data, window_size):
    res = []
    for (i,t) in enumerate(data):
        if i == 0:
            continue
        start = i - window_size
        if start < 0:
            start = 0
        end = i
        sl = data[start:end]
        x = np.average(sl)
        res.append(x)
    return res

data = get_available_datasets()

plt.rcParams["figure.figsize"] = (20,8)  # change size of charts

# All Data

In [None]:
for (i, datum) in enumerate(data):
    df = load_dataset(datum)
    plt.figure(i)
    plt.ylabel('Signal (V)')
    plt.xlabel('Time (ms)')
    w = plt.plot([float(i)*(float(1)/50) for i in range(0, len(df))], df, linewidth=1.0)
    plt.title(datum['File'] + ' (' + datum['Dir'] + ')')

# Waveforms by Gesture

In [None]:
g = {}
for (i, datum) in enumerate(data):
    if datum['File'] not in g:
        g[datum['File']] = []
    g[datum['File']].append(datum)
for (i, gesture) in enumerate(g.keys()):
    gdata = g[gesture]
    plt.figure(i)
    plt.ylabel('Signal (V)')
    plt.xlabel('Time (ms)')
    for datum in gdata:
        df = load_dataset(datum)
        w = plt.plot([float(i)*(float(1)/50) for i in range(0, len(df))], df, linewidth=1.0, label=datum['Dir'])
    plt.title('All ' + gesture + ' Gestures')

FFT for each Data

In [None]:
# datasets_dict = get_available_datasets()
# for (i, datum) in enumerate(datasets_dict):
#     cur_dataset = load_dataset(datum, raw=False)
#     time_range = len(cur_dataset)/50000

#     freqs = fftfreq(len(cur_dataset))

#     # only include positive frequencies
#     mask = freqs > 0
#     fft_vals = fft(cur_dataset)
    
#     # range of x-values (time), one coordinate per data
#     x = np.linspace(0, time_range, len(cur_dataset))

#     # true theoretical fft
#     fft_theo = 2.0* np.abs(fft_vals/len(cur_dataset))

#     plt.figure(i)
    
#     plt.plot(freqs[mask], fft_theo[mask], label = 'true fft values')
#     plt.title('FFT values' +  datum['Dir'])


# Segmentation (finding Gestures)

In [47]:
def find_gestures_in_all_datasets(path_filter=""):
    datasets_dict = get_available_datasets()
    map_dataname_to_gestures_indices_list = {}
    
    for (i, datum) in enumerate(datasets_dict):
        if path_filter not in datum['Path']:
            continue
        print('____', datum['Path'])
        dataset = load_dataset(datum, raw=False)
        gestures = find_gestures_in_dataset(dataset)
        yield gestures
        
      
# little test function to empiracally determine the best sample_win_size and threshold for dataset
def find_gestures_in_new_small_pad_datasets(path_filter="small-pad-"):
    #500Hz
    datasets_dict = get_available_datasets()
    map_dataname_to_gestures_indices_list = {}
    
    for (i, datum) in enumerate(datasets_dict):
        if path_filter not in datum['Path']:
            continue
        print('____', datum['Path'])
        dataset = load_dataset(datum, raw=False)
        best_gestures = []
        best_threshold = 0
        target_gestures = 16
        best_win_size = 0
        for sample_win_size in np.linspace(180, 200, num=10):
            sample_win_size = int(sample_win_size)
            for threshold in np.linspace(0, 80, num=40):
                gestures = find_gestures_in_dataset(dataset, sample_win_size=sample_win_size, threshold=threshold)
                if abs(len(gestures) - target_gestures) < abs(len(best_gestures) - target_gestures) or len(best_gestures) == 0:
                    best_gestures = gestures
                    best_threshold = threshold
                    best_win_size = sample_win_size
        print("Best threshold for " + datum['Path'] + ": " + str(best_threshold) + " and win_size=" + str(best_win_size) + " with " + str(len(best_gestures)) + " gestures")
        map_dataname_to_gestures_indices_list[datum['Path']] = best_gestures
        
        #go to next dataset

# returns the list of gesture events happening in dataset
# cur_dataset: a list of readings
def find_gestures_in_dataset(cur_dataset, sample_win_size=50000, overlap=.5, threshold=45000):
    total_time_range = len(cur_dataset)/sample_win_size

    start_win = 0
    end_win = sample_win_size
    slide_amount = round(sample_win_size * overlap)

    map_start_to_fft_sum = {}
    signals_list = []

    # list of sums of all fft bin for each window used on this dataset 
    # each window represents one second of the signal
    list_of_fftSums = []

    
    while start_win < len(cur_dataset):

        # each window covers an event occuring in one second of time
        window_data = cur_dataset[start_win:end_win]

        freqs = fftfreq(len(window_data))
        ##sum_of_fftBins = sum(np.abs(freqs))

        # getting sum of fft bins, then the sum of their frequency values

        fft_vals = rfft(window_data)
        # fftTheo = 2.0* np.abs(fft_vals/len(cur_dataset))
        ## sum_of_values = sum(fft_theo)

        sum_of_fftValues = sum(np.abs(fft_vals))

        # scale smaller slices - no idea if this is accurate
        sum_of_fftValues *= np.round(sample_win_size / (end_win - start_win))

        # appending sum of the fftbins to list 
        list_of_fftSums += [sum_of_fftValues]

        # update the window size to include 50000 samples, half new and half old.
        start_win += slide_amount
        end_win = min(end_win + slide_amount, len(cur_dataset))

        #print(sum_of_fftValues)

    #print("Threshold: " + str(threshold))
    fftSums = list_of_fftSums
    i = 0
    gestures = []
    while i < len(fftSums):
        if fftSums[i] > threshold:
            start = i
            while i < len(fftSums) and fftSums[i] > threshold:
                i += 1
            end = i
            gestures += [int(slide_amount*start), int(slide_amount*end)]
        i += 1

    #print(gestures)
    return gestures

# determines the best thresholds
find_gestures_in_new_small_pad_datasets()


____ ./data/small-pad-crazy/swipedown.csv
Best threshold for ./data/small-pad-crazy/swipedown.csv: 10.256410256410255 and win_size=180 with 16 gestures
____ ./data/small-pad-crazy/swipeup.csv
Best threshold for ./data/small-pad-crazy/swipeup.csv: 38.97435897435897 and win_size=186 with 16 gestures
____ ./data/small-pad-crazy/swiperight.csv
Best threshold for ./data/small-pad-crazy/swiperight.csv: 32.82051282051282 and win_size=180 with 16 gestures
____ ./data/small-pad-crazy/swipeleft.csv
Best threshold for ./data/small-pad-crazy/swipeleft.csv: 36.92307692307692 and win_size=180 with 16 gestures


# Classification (which gesture?) 

In [None]:
def find_gestures_in_all_datasets(path_filter="small-pad-crazy"):