In [None]:
%load_ext autoreload
%autoreload 2

import sys

In [None]:
import os
import array
import re

import pandas
import numpy

import plotly.express
import librosa
from matplotlib import pyplot as plt

from software.features.spectral import design_bandpass, plot_bandpass
from software.features.featureutils import resample, rms, vector_magnitude
from software.dataset.combine import load_data, read_labels, parse_video_filename, apply_labels

In [None]:
sensor_data_path = 'data/jonnor-brushing-1/har_record/'
labels_path  = 'data/jonnor-brushing-1/labels/project-7-at-2024-12-31-23-50-84589958.csv'

samplerate = 50
lower = 2.0
upper = 5.0

In [None]:
bandpass_sos = design_bandpass(lower=lower, upper=upper, sr=samplerate, order=2)
#plot_bandpass(bandpass_sos, lower=lower, upper=upper, sr=samplerate);

In [None]:
# Load sensor data
data = load_data(sensor_data_path)
data = data.drop_duplicates(subset=['time'])
print(data.columns)
print(data.head(5))

In [None]:
# Load labels
labels = read_labels(labels_path)
labels['gdrive_id'] = labels['filename'].str.replace('ucexportdownloadid', '')
labels = labels.drop(columns=['data_url', 'file', 'filename'])
labels

In [None]:
videos_path = 'data/jonnor-brushing-1/videos.csv'
videos = pandas.read_csv(videos_path)
videos['file_time'] = videos.filename.apply(parse_video_filename)
gdrive_prefix = 'https://drive.google.com/uc?export=download&id='
videos['gdrive_id'] = videos['data_url'].str.replace(gdrive_prefix, '')
videos = videos.drop(columns=['data_url'])
videos

In [None]:
mm = pandas.merge(labels, videos, left_on='gdrive_id', right_on='gdrive_id')
mm = mm.drop(columns=['updated_at', 'created_at', 'lead_time', 'annotator', 'annotation_id', 'activity', 'channel', 'id'])
align_label = pandas.to_timedelta(mm['label_alignment'], unit='s')
mm['start_time'] = mm['file_time'] + align_label + pandas.to_timedelta(mm['start'], unit='s')
mm['end_time'] = mm['file_time'] + align_label + pandas.to_timedelta(mm['end'], unit='s')
mm['dummy_filename'] = 'only-one-sensor'
mm = mm.drop(columns=['file_time', 'start', 'end'])
mm = mm.drop(columns=['gdrive_id'])
mm

In [None]:
mm.groupby(['filename', 'class'])['duration'].sum()

In [None]:
def apply_sessions(data, labels, pad_start='30s', pad_end='30s'):

    pad_start = pandas.Timedelta(pad_start)
    pad_end = pandas.Timedelta(pad_end)
    
    df = data.reset_index().set_index('time').sort_index()
    
    for idx, ll in labels.groupby('filename'):
        s = ll['start_time'].min() - pad_start
        e = ll['end_time'].max() + pad_end
        s = max(s, df.index.min())
        e = min(e, df.index.max())
        print(s, e, idx)
    
        # also assosicate the "session"
        df.loc[s:e, 'session'] = idx

    return df

pre = data.copy()
pre['dummy_filename'] = 'only-one-sensor'
#pre = pre.sort_values('time')
ss = apply_labels(pre, mm, groupby='dummy_filename', start='start_time', end='end_time')
ss = apply_sessions(ss, mm)
ss['is_brushing'] = ss['class'].isin(['brushing'])
# drop unlabeled
# FIXME: include the sessions without videos??
ss = ss.dropna(subset='session')
ss

In [None]:
mm

In [None]:

def plot_session(sensors, labels, title=None, height=300, aspect=3.0):

    time_column = 'time'
    columns = ['x', 'y', 'z']
    width = height * aspect
    annotation_column = 'class'
    
    # TODO: plot other values than raw XYZ.
    # Like motion, from removing gravity. Maybe overall RMS, delta rms, band-passed 2-5hz instead of  
    sel = sensors.reset_index()
    fig = plotly.express.line(sel,
                              x=time_column,
                              y=columns,
                              width=width,
                              height=height,
                              title=title,
                             )
    fig.update_traces(connectgaps=False)
    fig.update_layout(showlegend=False)

    colors = plotly.express.colors.qualitative.Plotly

    types = labels[annotation_column].unique()
    annotation_colors = { v: c for v, c in zip(types, colors) }
    
    # FIXME: put this on the bottom
    for idx, l in labels.iterrows():
        #print(dict(l))
        color = annotation_colors[l[annotation_column]]
        fig.add_vrect(x0=l['start_time'],
                x1=l['end_time'],
                line_width=0,
                fillcolor=color,
                opacity=0.3,
                label=dict(
                    text=l[annotation_column],
                    textposition="start",
                    font=dict(size=10, color="black"),
                    yanchor="top",
                ),
         )
    
    return fig

for session, session_data in ss.groupby('session'):
    print(session)
    ll = mm.set_index('filename').loc[session]
    fig = plot_session(session_data, ll, title=session)
    fig.show()

In [None]:
ss.to_parquet('combined.parquet')

In [None]:

mm = pp.reset_index()
#mm = pandas.merge(mm, filename_meta, left_on='filename', right_on='filename')
mm['motion_mag'] = vector_magnitude(mm[['motion_x', 'motion_y', 'motion_z']], axis=1)
mm['bandpass_motion_mag'] = vector_magnitude(mm[['bandpass_motion_x', 'bandpass_motion_y', 'bandpass_motion_z']], axis=1)
mm

In [None]:


freq = '200ms'

re1 = resample(sub[['bandpass_motion_mag']], freq=freq, func=lambda a, **kwargs: rms(a.values, axis=0))
re2 = resample(sub[['norm_gravity_pca0']], freq=freq, func='mean')

m = pandas.merge(re1, re2, right_index=True, left_index=True)
m = pandas.merge(m, sub, right_index=True, left_index=True)
m

In [None]:
print(m.elapsed.max())