In [None]:
import pandas as pd
import seaborn as sns
sns.set(rc={'figure.figsize':(11, 4)})
from ipca import IncrementalPCA
from scipy.integrate import cumtrapz, trapz
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
import numpy as np
from datetime import timedelta
import gpxpy
from xml.etree import ElementTree as ET

## Data import

In [None]:
data_file_path = "../data/wmf-up-n-down.csv"
df = pd.read_csv(data_file_path)
df.columns = ["time", "aX", "aY", "aZ", "gX", "gY", "gZ"]
df.time = df.time - df.time.min()
df

In [None]:
df.loc[:,["aX", "aY", "aZ"]] = df.loc[:,["aX", "aY", "aZ"]] * 9.8

Since we do not know the direction of movement (the device can be placed in different orientation), we perform Principal component analysis (PCA). The 0th component should ideally lie in the direction of movement, assuming there's a greater variation in acceleration in this direction.

In [None]:
df.loc[:,["pca0", "pca1", "pca2"]] = IncrementalPCA(50).fit_transform(df.loc[:,["aX", "aY", "aZ"]].values)

In [None]:
def get_peaks(data):
    pos_kwargs={
        "distance": 20,
        "height": (35, None)
    }
    peaks, _ = find_peaks(data, **pos_kwargs)
    return np.hstack([[0],peaks,[len(data)-1]])

def get_step_ranges(data):
    peaks = get_peaks(data)
    midpoints = (peaks[1:] + peaks[:-1]) / 2
    return np.vstack([midpoints[:-1], midpoints[1:]]).T.round()

steps = get_step_ranges(df.pca0)
steps.shape

In [None]:
step = steps[100]
a, b = step.astype(int)
velocity = cumtrapz(df.pca0.iloc[a:b], x=df.time.iloc[a:b] / 1000) * 3.6
display(pd.Series(velocity).abs().max())
df.pca0.iloc[a:b].plot()
plt.show()
sns.lineplot(x=range(len(velocity)), y=velocity)
plt.show()

In [None]:
def get_speed(data, time, step):
    a, b = step.astype(int)
    velocity = cumtrapz(data.loc[a:b], x=time.loc[a:b] / 1000) * 3.6
    return np.max(np.abs(velocity))

velocity = []
window_size = 1000
step_size = 100
for i in np.arange(-window_size+step_size, len(df), step_size):
    _df = df.loc[i:i+window_size]
    _steps = [x for x in steps if (int(x[0]) in _df.index) and (x[1] in _df.index)]
    velocity += [{
        "time": _df.time.iloc[-1],
        "velocity": np.mean([get_speed(_df.pca0, _df.time, x) for x in _steps])
    }]
velocity = pd.DataFrame(velocity).fillna(0.0)
sns.lineplot(x=velocity.time/1000, y=velocity.velocity)

In [None]:
velocity.(velocity.time.diff() * velocity.velocity / 1000).fillna(0).cumsum() / 3.6

In [None]:
def round_to_seconds(timestamp):
    return timestamp - timedelta(microseconds=timestamp.microsecond)

timestamp = pd.to_datetime("2020-09-22T16:02:36.516Z")
timestamps = [round_to_seconds(timestamp + timedelta(seconds=x)) for x in time_steps / 1000]
out_df = pd.DataFrame([timestamps, velocity, distance]).T.dropna()
out_df.columns = ["time", "speed", "course"]
out_df.speed = out_df.speed.astype(float)
out_df.course = out_df.course.astype(float)
out_df = out_df.groupby("time").mean().reset_index()

In [None]:
def get_speed_extension(speed, distance):
    prefix = "gpxtpx:"
    element = ET.Element(f"{prefix}TrackPointExtension")
    speed_element = ET.SubElement(element, f"{prefix}speed")
    course_element = ET.SubElement(element, f"{prefix}course")
    cadence_element = ET.SubElement(element, f"{prefix}cad")
    # Schema only permits integers up to 254
    cadence_element.text = str(30)
    speed_element.text = str(speed)
    course_element.text = str(distance)
    return element

def get_nmea_speed(speed):
    prefix = "nmea:"
    element = ET.Element(f"{prefix}speed")
    element.text = str(speed)
    return element

def get_point(time, speed, distance):
    extensions = [get_nmea_speed(speed), get_speed_extension(speed, distance)]
    point = gpxpy.gpx.GPXTrackPoint()
    point.extensions = extensions
    point.time = time
    return point

def get_gpx(timestamps, speed, distance):
    """
    data -- pandas DataFrame with time and cadence fields
    """
    gpx = gpxpy.gpx.GPX()
    gpx.nsmap["gpxtpx"] = 'http://www.garmin.com/xmlschemas/TrackPointExtension/v2'
    gpx.nsmap["nmea"] = 'http://trekbuddy.net/2009/01/gpx/nmea'
    track = gpxpy.gpx.GPXTrack()
    gpx.tracks.append(track)
    segment = gpxpy.gpx.GPXTrackSegment()
    track.segments.append(segment)
    segment.points = [get_point(t, v, s) for t, v, s in zip(timestamps, speed, distance)]
    return gpx
    
output_file_path = "../data/wmf-up-n-down.gpx"
# timestamp = pd.to_datetime("2020-09-22T16:02:36.516Z")
# timestamps = [timestamp + timedelta(seconds=x) for x in steps[:,1] / 1000]

with open(output_file_path, 'w+') as f:
    f.write(get_gpx(out_df.time, out_df.speed.values / 3.6, out_df.course.values).to_xml())