In [1]:
import pandas as pd
import numpy as np
from xml.dom.minidom import parse, parseString

In [2]:
def get_data(input_gpx):
    # TODO: you may use your code from exercise 3 here.
    # create function to extract cols 
    def element_to_data(ele):
        lat = float(ele.getAttribute("lat"))
        lon = float(ele.getAttribute("lon"))
        dtime = ele.getElementsByTagName('time')
        dtime = pd.to_datetime(dtime[0].firstChild.nodeValue, utc=True)

        return lat, lon, dtime

    # read file
    gps_doc = parse(input_gpx)
    # get 'trkpt' element from file 
    gps_elements = gps_doc.getElementsByTagName('trkpt')
    # get data from elment and create df with cols 
    gps_data = pd.DataFrame(list(map(element_to_data, gps_elements)),
                             columns=['lat', 'lon', 'timestamp'])
    return gps_data


In [3]:
gps = get_data('walk1/gopro.gpx')
accl = pd.read_json('walk1/accl.ndjson.gz', lines=True, convert_dates=['timestamp'])[['timestamp', 'x']]

phone = pd.read_csv('walk1/phone.csv.gz')[['time', 'gFx', 'Bx', 'By']]
first_time = accl['timestamp'].min()
phone['timestamp'] = first_time + pd.to_timedelta(phone['time'], unit='sec')


In [4]:
gps

Unnamed: 0,lat,lon,timestamp
0,49.278656,-123.016106,2022-06-08 18:12:09.844000+00:00
1,49.278609,-123.016093,2022-06-08 18:12:09.899142+00:00
2,49.278635,-123.016249,2022-06-08 18:12:09.954285+00:00
3,49.278625,-123.016099,2022-06-08 18:12:10.009428+00:00
4,49.278669,-123.016219,2022-06-08 18:12:10.064571+00:00
...,...,...,...
7853,49.277593,-123.015828,2022-06-08 18:19:22.970998+00:00
7854,49.277523,-123.015720,2022-06-08 18:19:23.026141+00:00
7855,49.277496,-123.015769,2022-06-08 18:19:23.081284+00:00
7856,49.277516,-123.015880,2022-06-08 18:19:23.136427+00:00


In [5]:
# unify the times, aggregate using 4 second bins 
accl['dt'] = accl['timestamp'].round("4S")
gps['dt'] = gps['timestamp'].round("4S")
phone['dt'] = phone['timestamp'].round("4S")

In [6]:
# group on the rounded-times, and average all of the other values 
accl = accl.groupby(['dt']).mean()
gps = gps.groupby(['dt']).mean()
phone = phone.groupby(['dt']).mean()

In [7]:
accl

Unnamed: 0_level_0,x
dt,Unnamed: 1_level_1
2022-06-08 18:12:08+00:00,0.008074
2022-06-08 18:12:12+00:00,-0.241065
2022-06-08 18:12:16+00:00,0.401996
2022-06-08 18:12:20+00:00,0.717109
2022-06-08 18:12:24+00:00,0.889633
...,...
2022-06-08 18:19:08+00:00,0.502401
2022-06-08 18:19:12+00:00,0.705140
2022-06-08 18:19:16+00:00,0.644120
2022-06-08 18:19:20+00:00,0.115184


In [8]:
gps

Unnamed: 0_level_0,lat,lon
dt,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-06-08 18:12:08+00:00,49.278633,-123.016149
2022-06-08 18:12:12+00:00,49.278659,-123.016139
2022-06-08 18:12:16+00:00,49.278659,-123.016095
2022-06-08 18:12:20+00:00,49.278638,-123.016024
2022-06-08 18:12:24+00:00,49.278600,-123.015985
...,...,...
2022-06-08 18:19:08+00:00,49.277550,-123.016160
2022-06-08 18:19:12+00:00,49.277542,-123.016076
2022-06-08 18:19:16+00:00,49.277552,-123.015960
2022-06-08 18:19:20+00:00,49.277549,-123.015865


In [9]:
phone

Unnamed: 0_level_0,time,gFx,Bx,By
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-06-08 18:12:08+00:00,0.094858,-0.006360,-11.407500,2.880000
2022-06-08 18:12:12+00:00,2.231188,-0.041580,-18.578124,5.555953
2022-06-08 18:12:16+00:00,6.125778,0.025079,-18.899868,1.150763
2022-06-08 18:12:20+00:00,10.149935,0.060139,-15.033802,-8.815840
2022-06-08 18:12:24+00:00,14.140882,0.080922,-13.939018,-11.510971
...,...,...,...,...
2022-06-08 18:18:52+00:00,402.149981,0.033399,-20.264264,3.550042
2022-06-08 18:18:56+00:00,406.147973,0.053450,-5.007277,15.213546
2022-06-08 18:19:00+00:00,410.151322,0.124351,-0.613833,15.572054
2022-06-08 18:19:04+00:00,414.151236,0.113568,-13.364730,9.301373


In [10]:
accl[:106]

Unnamed: 0_level_0,x
dt,Unnamed: 1_level_1
2022-06-08 18:12:08+00:00,0.008074
2022-06-08 18:12:12+00:00,-0.241065
2022-06-08 18:12:16+00:00,0.401996
2022-06-08 18:12:20+00:00,0.717109
2022-06-08 18:12:24+00:00,0.889633
...,...
2022-06-08 18:18:52+00:00,0.532375
2022-06-08 18:18:56+00:00,0.776529
2022-06-08 18:19:00+00:00,1.334968
2022-06-08 18:19:04+00:00,1.218110


In [11]:
# find the offset with the highest cross-correlation 
cor_rel = phone['gFx'].dot(accl['x'][:106])


In [12]:
np.linspace(-5.0, 5.0, 101)
    

array([-5. , -4.9, -4.8, -4.7, -4.6, -4.5, -4.4, -4.3, -4.2, -4.1, -4. ,
       -3.9, -3.8, -3.7, -3.6, -3.5, -3.4, -3.3, -3.2, -3.1, -3. , -2.9,
       -2.8, -2.7, -2.6, -2.5, -2.4, -2.3, -2.2, -2.1, -2. , -1.9, -1.8,
       -1.7, -1.6, -1.5, -1.4, -1.3, -1.2, -1.1, -1. , -0.9, -0.8, -0.7,
       -0.6, -0.5, -0.4, -0.3, -0.2, -0.1,  0. ,  0.1,  0.2,  0.3,  0.4,
        0.5,  0.6,  0.7,  0.8,  0.9,  1. ,  1.1,  1.2,  1.3,  1.4,  1.5,
        1.6,  1.7,  1.8,  1.9,  2. ,  2.1,  2.2,  2.3,  2.4,  2.5,  2.6,
        2.7,  2.8,  2.9,  3. ,  3.1,  3.2,  3.3,  3.4,  3.5,  3.6,  3.7,
        3.8,  3.9,  4. ,  4.1,  4.2,  4.3,  4.4,  4.5,  4.6,  4.7,  4.8,
        4.9,  5. ])

In [14]:
# remake phone['time'] as phone['time'] + offset
phone = pd.read_csv('walk1/phone.csv.gz')[['time', 'gFx', 'Bx', 'By']]
phone['time'] = phone['time'] + cor_rel
phone['timestamp'] = first_time + pd.to_timedelta(phone['time'], unit='sec')
phone['dt'] = phone['timestamp'].round("4S")
phone = phone.groupby(['dt']).mean()

In [15]:
phone

Unnamed: 0_level_0,time,gFx,Bx,By
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-06-08 18:12:12+00:00,3.723264,-0.022693,-18.601846,3.764600
2022-06-08 18:12:16+00:00,6.186828,-0.038942,-18.361438,5.931839
2022-06-08 18:12:20+00:00,10.160546,0.040224,-19.146821,-1.629324
2022-06-08 18:12:24+00:00,14.167457,0.075039,-12.666269,-10.506874
2022-06-08 18:12:28+00:00,18.162674,0.065432,-16.082451,-10.711484
...,...,...,...,...
2022-06-08 18:18:56+00:00,406.142206,0.014521,-19.845864,3.892510
2022-06-08 18:19:00+00:00,410.131078,0.087564,-1.364599,17.331532
2022-06-08 18:19:04+00:00,414.143503,0.126201,-0.598713,15.650628
2022-06-08 18:19:08+00:00,418.148795,0.080248,-16.925999,6.395530


In [32]:
# combine three dfs
combined = phone.join(accl)
combined = combined.join(gps)
combined = combined.reset_index()

In [33]:
combined[['lat', 'lon', 'dt']]


Unnamed: 0,lat,lon,dt
0,49.278659,-123.016139,2022-06-08 18:12:12+00:00
1,49.278659,-123.016095,2022-06-08 18:12:16+00:00
2,49.278638,-123.016024,2022-06-08 18:12:20+00:00
3,49.278600,-123.015985,2022-06-08 18:12:24+00:00
4,49.278549,-123.015958,2022-06-08 18:12:28+00:00
...,...,...,...
101,49.277431,-123.016238,2022-06-08 18:18:56+00:00
102,49.277485,-123.016246,2022-06-08 18:19:00+00:00
103,49.277538,-123.016229,2022-06-08 18:19:04+00:00
104,49.277550,-123.016160,2022-06-08 18:19:08+00:00
