# Analysing steps in a short walk using acceleration and rotation

In this noteboook we examine a recording from the Arduino IMU of a short walk with the aim to extract the step count, cadence and timings including ground time and step duration.

We implement three techniques outlined in the paper `A comprehensive comparison of simple step counting techniques using wrist- and ankle-mounted accelerometer and gyroscope signals` by Matthew Rudy and Joseph Mahoney - [https://www.researchgate.net/publication/325451208_A_comprehensive_comparison_of_simple_step_counting_techniques_using_wrist-_and_ankle-mounted_accelerometer_and_gyroscope_signals](https://www.researchgate.net/publication/325451208_A_comprehensive_comparison_of_simple_step_counting_techniques_using_wrist-_and_ankle-mounted_accelerometer_and_gyroscope_signals).

* Peak-finding
* Fast Fourier Transform (FFT)
* Autocorrelation

Each of these methods allows us to count steps. The peak-finding method also identifies where the steps occur in the timeseries, so this in turn allows us to isolate steps and calculate such things as ground time and step duration.

## The IMU

The Arduino Nano inertial measurement unit gives us acceleration, measured in `g`s (`1g = 9.8m/s/s`), and rotation (angular velocity). In this notebook we demonstrate how to extract steps from either acceleration or rotation.

## The data

We expect a CSV file with columns for time, 3 axes of acceleration, and 3 axes of gyroscopic rotation.

## Setup

In [None]:
import pandas as pd
import seaborn as sns
from scipy.integrate import cumtrapz
from scipy.signal import butter, filtfilt, periodogram, spectrogram, find_peaks
import matplotlib.pyplot as plt
sns.set(rc={'figure.figsize':(11, 4)})
import numpy as np
import gpxpy
from xml.etree import ElementTree as ET
from datetime import timedelta

## Data import

In [None]:
data_file_path = "../data/WearableMyFoot 1km backgarden run.csv"
output_file_path = "../data/WearableMyFoot 1km backgarden run.gpx"
df = pd.read_csv(data_file_path)
df.columns = ["time", "aX", "aY", "aZ", "gX", "gY", "gZ"]
df.time = df.time - df.time.min()
df

## Key parameters

In [None]:
# params
fs = 1000 / 10 # Hz, sampling frequency
timestamp = pd.to_datetime("2020-09-04T14:34:10")
total_time = df.time.max() - df.time.min()

## Helper functions

In [None]:
def get_time_period(a, b):
    """
    a, b -- time in seconds
    """
    return df.loc[(df.time >= a * 1000) & (df.time < b * 1000)]

def low_pass(data, fc, fs):
    w = fc / (fs / 2) # Normalize the frequency
    b, a = butter(5, w, 'low')
    return filtfilt(b, a, data)

def get_magnitude(data, cutoff_frequency=None, fs=None):
    magnitude = np.sqrt((data**2).sum(axis=1))
    if cutoff_frequency is None:
        return magnitude
    else:
        return low_pass(magnitude, cutoff_frequency, fs)

def peak_detection_steps(data, pos_kwargs=None, neg_kwargs=None, plot=False):
    peaks, _ = find_peaks(data, **pos_kwargs)
    neg_peaks, _ = find_peaks(-data, **neg_kwargs)
    if plot:
        sns.lineplot(x=range(len(data)), y=data)
        sns.scatterplot(x=peaks, y=data[peaks])
        sns.scatterplot(x=neg_peaks, y=data[neg_peaks])
        plt.show()
    return len(peaks)

def fft_dominant_freq(data, fs, plot=False):
    f, Pxx = periodogram(data, fs=fs)
    if plot:
        sns.lineplot(f, Pxx)
        plt.xlim([0.0,10.0])
        plt.show()
    return f[np.argmax(Pxx[10:])]

def fft_steps(data, dt, fs, plot=False):
    return dt * fft_dominant_freq(data, fs, plot) / 1000

def autocorr(x):
    result = np.correlate(x, x, mode='same')
    return result[:]

def autocorr_steps(data, plot=False):
    corr = autocorr(data)
    peaks, _ = find_peaks(corr)
    if plot:
        sns.lineplot(x=range(len(corr)), y=corr)
        sns.scatterplot(x=peaks, y=corr[peaks])
        plt.show()
    return len(peaks)

def to_steps_per_minute(step_count, dt):
    """
    dt -- time in seconds
    """
    return step_count / dt * 60

def get_spm_for_period(a, b, columns, cutoff_frequency, peak_detection_kwargs={}):
    dt = b - a
    aMagnitude = get_magnitude(get_time_period(a, b).loc[:,columns], cutoff_frequency=cutoff_frequency, fs=fs)
    n_peak_steps = peak_detection_steps(
        aMagnitude, 
        **peak_detection_kwargs
    )
    dominant_freq = fft_dominant_freq(aMagnitude, fs=fs)
    n_autocorr_steps = autocorr_steps(aMagnitude)
    return pd.Series({
        "peak_detection_spm": to_steps_per_minute(n_peak_steps, dt), 
        "fft_spm": dominant_freq * 60, 
        "autocorrelation_spm": to_steps_per_minute(n_autocorr_steps, dt), 
    })

# Measuring steps from acceleration

In [None]:
sns.lineplot(x=df.time, y=df.aX, label="aX")
sns.lineplot(x=df.time, y=df.aY, label="aY")
sns.lineplot(x=df.time, y=df.aZ, label="aZ")

In [None]:
_df = df.iloc[:9]
sns.lineplot(x=_df.time, y=_df.aX, label="aX")
sns.lineplot(x=_df.time, y=_df.aY, label="aY")
sns.lineplot(x=_df.time, y=_df.aZ, label="aZ")

In [None]:
gravity_vector = _df.loc[:,["aX", "aY", "aZ"]].mean()
(gravity_vector, (gravity_vector**2).sum())

In [None]:
df.loc[:,["aX", "aY", "aZ"]] = (df.loc[:,["aX", "aY", "aZ"]] - gravity_vector) * 9.8

In [None]:
f, t, Sxx = spectrogram(get_magnitude(df.loc[:,["aX", "aY", "aZ"]], fs=fs), fs)
plt.pcolormesh(t, f, Sxx, shading='gouraud')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
plt.show()

In [None]:
aMagnitude = get_magnitude(df.loc[:,["aX", "aY", "aZ"]], cutoff_frequency=None, fs=fs)
n_peak_steps = peak_detection_steps(
    aMagnitude, 
    pos_kwargs={
        "prominence": 2,
        "distance": 3
    }, 
    neg_kwargs={
        "prominence": 1
    },
    plot=True
)
n_fft_steps = fft_steps(aMagnitude, dt=total_time, fs=fs, plot=True)
n_autocorr_steps = autocorr_steps(aMagnitude, plot=True)
print(n_peak_steps, n_fft_steps, n_autocorr_steps)
print(
    to_steps_per_minute(n_peak_steps, total_time / 1000), 
    to_steps_per_minute(n_fft_steps, total_time / 1000), 
    to_steps_per_minute(n_autocorr_steps, total_time / 1000), 
)

In [None]:
sns.lineplot(x=range(100), y=aMagnitude[10000:10100])

In [None]:
peak_detection_kwargs = {
    "pos_kwargs": {
        "prominence": 2,
        "distance": 200
    }, 
    "neg_kwargs": {
        "prominence": 1
    },
}

spms = []
dt = 10
for a in np.arange(0, df.time.max() / 1000, dt):
    b = a + dt
    spm = get_spm_for_period(a, b, ["aX", "aY", "aZ"], cutoff_frequency=1.8, peak_detection_kwargs=peak_detection_kwargs)
    spm.name = a
    spms += [spm]
spm_df = pd.DataFrame(spms)
spm_df.plot()
spm_df.describe()

# Measuring steps from rotation

In [None]:
sns.lineplot(x=df.time, y=df.gX, label="gX")
sns.lineplot(x=df.time, y=df.gY, label="gY")
sns.lineplot(x=df.time, y=df.gZ, label="gZ")

In [None]:
f, t, Sxx = spectrogram(get_magnitude(df.loc[:,["gX", "gY", "gZ"]], fs=fs), fs)
plt.pcolormesh(t, f, Sxx, shading='gouraud')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
plt.show()

In [None]:
gMagnitude = get_magnitude(df.loc[:,["gX", "gY", "gZ"]], cutoff_frequency=2.0, fs=fs)
n_peak_steps = peak_detection_steps(
    gMagnitude, 
    pos_kwargs={
        "prominence": 2,
        "distance": 3
    }, 
    neg_kwargs={
        "prominence": 1
    },
    plot=True
)
n_fft_steps = fft_steps(gMagnitude, dt=1000, fs=fs, plot=True)
n_autocorr_steps = autocorr_steps(gMagnitude, plot=True)
print(n_peak_steps, n_fft_steps, n_autocorr_steps)
print(
    to_steps_per_minute(n_peak_steps, total_time / 1000), 
    to_steps_per_minute(n_fft_steps, total_time / 1000), 
    to_steps_per_minute(n_autocorr_steps, total_time / 1000), 
)

In [None]:
peak_detection_kwargs = {
    "pos_kwargs": {
        "height": (50, None)
    }, 
    "neg_kwargs": {
        "prominence": 1
    },
}

spms = []
dt = 1
for a in np.arange(0, df.time.max() / 1000, dt):
    b = a + dt
    spm = get_spm_for_period(a, b, ["gX", "gY", "gZ"], cutoff_frequency=2.0, peak_detection_kwargs=peak_detection_kwargs)
    spm.name = a
    spms += [spm]
spm_df = pd.DataFrame(spms)
spm_df.plot()
spm_df.describe()


# Export GPX
Contains a track with cadence

In [None]:
def get_cadence_extension(cadence):
    prefix = "gpxtrx:"
#     prefix = "{http://www.garmin.com/xmlschemas/TrackPointExtension/v1}"
    element = ET.Element(f"{prefix}TrackPointExtension")
    cadence_element = ET.SubElement(element, f"{prefix}cad")
    # Schema only permits integers up to 254
    cadence_element.text = str(int(cadence if cadence <= 254 else 254))
    return element

def get_point(time, cadence):
    extensions = [get_cadence_extension(cadence)]
    point = gpxpy.gpx.GPXTrackPoint()
    point.extensions = extensions
    point.time = time
    return point

def get_gpx(data):
    """
    data -- pandas DataFrame with time and cadence fields
    """
    gpx = gpxpy.gpx.GPX()
    gpx.nsmap["gpxtrx"] = 'http://www.garmin.com/xmlschemas/GpxExtensions/v3'
    track = gpxpy.gpx.GPXTrack()
    gpx.tracks.append(track)
    segment = gpxpy.gpx.GPXTrackSegment()
    track.segments.append(segment)
    segment.points = [get_point(x.time, x.fft_spm) for _, x in data.iterrows()]
    return gpx
    
spm_df.loc[:,"time"] = [timestamp + timedelta(seconds=x) for x in spm_df.index]
with open(output_file_path, 'w+') as f:
    f.write(get_gpx(spm_df).to_xml())