In [None]:
%load_ext autoreload
%autoreload 2

import sys


In [None]:
import os

import pandas

from software.dataset.hussain2021 import load_data, load_meta
from firmware.process import GravitySplitter

In [None]:
dataset_path = './data/hx5kkkbr3j-1'
data = load_data(dataset_path)
data.head()

In [None]:
meta = load_meta(data)
meta

In [None]:
# only accelerometer data
acc = data.dropna(subset=['acc_x', 'acc_y', 'acc_z']).drop(columns=['mag_x', 'mag_y', 'mag_z', 'gyro_x', 'gyro_y', 'gyro_z'])
acc = acc.reset_index()
acc = pandas.merge(acc, meta, left_on='filename', right_on='filename')
# Setting 2 has more specific protocol
# Pause for a few seconds in between different regions and bring the brush to a reference point
acc = acc[acc.setting == 'S2'] 
# Choose location mounted on brush
acc = acc[acc.sensor_location == 'A']
# Choose only manual brushing, not electric
acc = acc[acc.brush == 'M']


In [None]:
def resample(df, freq='1min', func='median', group='filename', time='time', numeric_only=True):
    grouped = df.reset_index().set_index(time).groupby(group, observed=True).resample(freq)
    out = grouped.agg(func, numeric_only=numeric_only).reset_index().set_index([group, time])

    return out

# Resample to 50 Hz
acc_re = resample(acc, freq='20ms', func='mean').reset_index()
acc_re

In [None]:
acc_re.filename.nunique()

In [None]:

import array

# TODO: downsample to 50 Hz?
# TODO: use micropython to process
def process(df, samplerate=200, groupby='filename', columns=['acc_x', 'acc_y', 'acc_z'], time='time'):

    def process_timeseries(g, splitter=None):
        g = g.sort_values(time)
        
        splitter = GravitySplitter(samplerate=samplerate)

        out = []
        index_values = []
        for idx, row in g.iterrows():
            xyz = array.array('f', row[columns])
            splitter.process(xyz)
        
            values = list(splitter.gravity) + list(splitter.motion)
            out.append(values)
            index_values.append(idx)

        names = ['gravity_x', 'gravity_y', 'gravity_z', 'motion_x', 'motion_y', 'motion_z']
        o = pandas.DataFrame(out, columns=names)
        o.index = g.index
        print('pp', len(o))
        return o
    
    groups = df.groupby(groupby, as_index=False)
    print('groups', len(groups))
    f = groups.apply(process_timeseries, include_groups=False)
    f.index = f.index.droplevel(0)
    #print(df.head())
    #print('sss\n\n')
    #print(f.head())
    
    out = pandas.merge(df, f, left_index=True, right_index=True)
    return out

ss = pandas.Series(acc_re.filename.unique()).sample(n=42, random_state=1)
sub = acc_re[acc_re.filename.isin(ss)]
pp = process(sub.set_index(['filename', 'time']))
pp

In [None]:
def vector_magnitude(vectors, axis=None):
    """Compute the magnitude of multi-dimensional vectors"""
    # alternative is numpy.linalg.norm(vectors)
    mag = numpy.linalg.norm(vectors, axis=axis)
    return mag


In [None]:
mm = pp.reset_index()
mm = pandas.merge(mm, meta, left_on='filename', right_on='filename')
mm

In [None]:
import plotly.express

plotly.express.line(mm, 
                    x='elapsed',
                    y=['gravity_x', 'gravity_y', 'gravity_z'],
                    facet_row='filename',
                    height=1000,
                    width=1000,
                   )
                    


In [None]:
plotly.express.line(mm, 
                    x='elapsed',
                    y=['motion_x', 'motion_y', 'motion_z'],
                    facet_row='filename',
                    height=1000,
                    width=1000,
                   )
                    


In [None]:
import librosa
import numpy
from matplotlib import pyplot as plt

In [None]:
def make_spectrogram(df, sr=50, hop_length=16, columns=None):
    if columns is None:
        columns = ['acc_x', 'acc_y', 'acc_z']
    df = df.sort_values('elapsed')
    df = df.dropna(subset=columns)
    
    n_fft = 4*hop_length

    # Make combined XYZ spectrogram, concat on frequency axis
    ss = []
    for c in columns:
        v = df[c].values
        S = librosa.stft(v, n_fft=n_fft, hop_length=hop_length)
        S = numpy.abs(S)
        S = librosa.amplitude_to_db(S)
        #S = S[1:,:] # drop DC
        ss.append(S)

    o = numpy.concatenate(ss)
    return o

specs = mm.groupby('filename').apply(make_spectrogram, include_groups=False).to_frame().rename(columns={0:'spectrogram'})
specs

In [None]:
    columns = ['motion_x', 'motion_y', 'motion_z']

In [None]:
def to_spectrum(S):
    s = numpy.mean(S, axis=1)
    o = pandas.Series(s, index=numpy.arange(len(s)))
    return o


def compute_spectrums(df):
    dfs = []
    for axis in ['x', 'y', 'z']:
        c = f'motion_{axis}'
    
        freqs = librosa.fft_frequencies(n_fft=4*16, sr=50)
    
        specs = df.groupby('filename').apply(make_spectrogram, include_groups=False, columns=[c]).to_frame().rename(columns={0:'spectrogram'})
        ss = specs.spectrogram.apply(make_spectrum)
        ss.columns = freqs

        # Normalize
        N = numpy.mean(ss.values, axis=1, keepdims=1)
        ss = pandas.DataFrame(ss-N, columns=ss.columns, index=ss.index)
        
        sf = pandas.melt(ss.reset_index(), id_vars='filename', var_name='frequency')
        sf['axis'] = axis
        
        dfs.append(sf)

    out = pandas.concat(dfs)
    return out

spectrums = compute_spectrums(mm)
#spectrums


In [None]:
fig = plotly.express.line(spectrums,
                    facet_row='axis',
                    color='filename',
                    x='frequency',
                    y='value',
                    height=800,
                    width=1500,
)
fig.add_vrect(x0=2.0, x1=5.0, line_width=0, fillcolor="green", opacity=0.2, layer='below')

In [None]:
for filename, spec in specs.iterrows():
    #print(filename)
    fig, ax = plt.subplots(1, figsize=(20, 5))
    S = spec.spectrogram
    N = numpy.quantile(S, q=0.10, axis=1, keepdims=True)
    #S = S - N
    #print(S.shape, N.shape)
    librosa.display.specshow(ax=ax, data=S)

In [None]:
fig = plotly.express.scatter(mm, 
                    x='gravity_y',
                    y='gravity_z',
                    color='filename',
                    height=800,
                    width=800,
                    opacity=0.2,
                   )
fig.update_traces(marker=dict(size=5.0))
fig.update_layout(showlegend=False)

In [None]:
fig = plotly.express.scatter_matrix(mm.sample(frac=0.05),
    dimensions=["motion_x", "motion_y", "motion_z"],
    color="subject",
    height=1200,
    width=1200,
    opacity=0.2,
)
fig.update_traces(marker=dict(size=3.0))
fig.show()