# Overview

This is a Jupyter notebook (work in progress) to visualize the data gathered during our testing with the SATURN patch.

The basic data filtering approach will be as follows:

For each dataset containing a time series of voltage readings:
1. Trim the dataset's beginning and end for "dead zones" where there is no data.
2. Smooth the dataset by taking the moving average.
3. Split the dataset into individual segments, since one dataset typically contains 5 individual readings of 1 gesture.
    
Then the data can be visualized. We're trying a few different approaches:
* [All Data](#All-Data) contains a list of the raw waveforms for all datasets. Useful for debugging.
* [All Data, Sliced and Overlaid](#All-Data--Sliced-and-Overlaid) contains a chart for each waveform containing all the segments, adjusted to be the same length and overlaid upon each other. Useful for validating that input data is consistent.
* [Segments by Gesture](#Segments-by-Gesture) contains a chart for each gesture. In each gesture chart, the segmented data from each configuration of SATURN (ex. with backing material, without backing material, large pad, small pad, etc.) is overlaid. This will help us determine what SATURN configurations produce the most differentiable signals.

# Imports and Utility Functions

In [8]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import itertools
import os
from numpy.fft import fft, fftfreq, ifft

plt.rcParams["figure.figsize"] = (20,8)  # change size of charts

# Returns a list of the datasets in the data directory.
# Each dataset in this list is a dict with three attributes:
#   Dir: directory under data/ occupied by this dataset
#   File: filename without extension
#   Path: relative path to .csv
def get_available_datasets():
    data = []

    for datafile in filter(lambda x: x[-4:] == '.csv',
        list(itertools.chain(*[[root+'/'+file for file in files]
        for root, _, files in os.walk('./data')]))):
        
        n = datafile.split('/')
        data.append({
            'Dir': n[-2],
            'File': n[-1].split('.')[0],
            'Path': datafile
        })

    return data

# Returns a dataset's contents as a list.
# If `raw` is not set, the results will be filtered and normalized.
def load_dataset(datum):
    dataset = pd.read_csv(datum['Path'], names=['V'], header=None)
    dataset = list(dataset['V'])
    return dataset

# Generator that, given a dataset, yields gestures
def sliding_window(signal, window_length=50000, overlap=.5, fft_bins=5):
    N = len(signal)
    start, end = 0, min(window_length, N)
    while end <= N:
        print(start, end)
        window = signal[start:end]
        freqs = fft(window)  # todo: do we need to use a norm= normalization?
        freq_bins = np.histogram(freqs, bins=fft_bins)
        print(freq_bins)
        start = end - (window_length * overlap)
        end = start + window_length
        print(start, end)
        break


data = get_available_datasets()
sliding_window(load_dataset(data[0]))

0 50000
(array([    1,     0,     0,     0, 49999]), array([-49.51832339+0.j        , -38.98671889+0.05266512j,
       -28.45511438+0.10533024j, -17.92350988+0.15799537j,
        -7.39190538+0.21066049j,   3.13969913+0.26332561j]))
25000.0 75000.0


  indices = f_indices.astype(np.intp)


# All Data

In [None]:
for (i, datum) in enumerate(data):
    df = load_dataset(datum)
    plt.figure(i)
    plt.ylabel('Signal (V)')
    plt.xlabel('Time (ms)')
    w = plt.plot([float(i)*(float(1)/50) for i in range(0, len(df))], df, linewidth=1.0)
    plt.title(datum['File'] + ' (' + datum['Dir'] + ')')