In [1]:
import sys
import pandas as pd
import numpy as np
from xml.dom.minidom import parse, parseString

In [2]:
data = parse("walk1.gpx")

In [3]:
data

<xml.dom.minidom.Document at 0x7f17048c54c8>

In [4]:
parse_result = data.getElementsByTagName("trkpt")

In [5]:
points_df = pd.DataFrame(columns=['lat', 'lon'])

In [6]:
for element in parse_result:
    lat = element.getAttribute('lat')
    lon = element.getAttribute('lon')
    points_df = points_df.append(pd.DataFrame({"lat":[lat], "lon":[lon]}), ignore_index = True)

In [7]:
points_df

Unnamed: 0,lat,lon
0,49.28015799,-123.00528338
1,49.28022235,-123.00543652
2,49.28023114,-123.00560132
3,49.2803905,-123.0064075
4,49.28021094,-123.00596532
5,49.2803988,-123.0061052
6,49.28016157,-123.00631066
7,49.2803988,-123.0061052
8,49.28019034,-123.0065485
9,49.280222,-123.0066558


In [8]:
def haversine(lat1, lon1, lat2, lon2):
    a = np.sin((np.deg2rad(lat2-lat1))/2)*np.sin((np.deg2rad(lat2-lat1))/2)
    b = np.cos(np.deg2rad(lat1))*np.cos(np.deg2rad(lat2))
    c = np.sin((np.deg2rad(lon2-lon1))/2)*np.sin((np.deg2rad(lon2-lon1))/2)
    return 12742*np.arcsin(np.sqrt(a+b*c))*1000

In [9]:
points = pd.DataFrame({'lat': [49.28, 49.26, 49.26], 'lon': [123.00, 123.10, 123.05]})

In [10]:
points

Unnamed: 0,lat,lon
0,49.28,123.0
1,49.26,123.1
2,49.26,123.05


In [11]:
points["lat2"] = points["lat"].shift(1)
points["lon2"] = points["lon"].shift(1)

In [12]:
points

Unnamed: 0,lat,lon,lat2,lon2
0,49.28,123.0,,
1,49.26,123.1,49.28,123.0
2,49.26,123.05,49.26,123.1


In [13]:
points["distance"] = points.apply(lambda row: haversine(row['lat'], row['lon'], row['lat2'], row['lon2']), axis = 1)

In [14]:
points

Unnamed: 0,lat,lon,lat2,lon2,distance
0,49.28,123.0,,,
1,49.26,123.1,49.28,123.0,7588.595484
2,49.26,123.05,49.26,123.1,3628.443408


In [15]:
total_distance = points["distance"].sum()

In [16]:
total_distance

11217.038892179959

In [17]:
from pykalman import KalmanFilter

In [18]:
initial_state = points_df.iloc[0]
observation_covariance = np.diag([0.95, 0.95])
transition_covariance = np.diag([0.1, 0.1])
transition = [[1, 0], [0, 1]]

In [19]:
kf = KalmanFilter(
    initial_state_mean=initial_state,
    initial_state_covariance=observation_covariance,
    observation_covariance=observation_covariance,
    transition_covariance=transition_covariance,
    transition_matrices=transition
)

In [20]:
smooth_points = points_df

In [21]:
smooth_points["lat"]= smooth_points["lat"].astype(float)
smooth_points["lon"]= smooth_points["lon"].astype(float)

In [22]:
smooth_points2, _ = kf.smooth(smooth_points)

In [23]:
smooth_points2

array([[  49.28021655, -123.0056586 ],
       [  49.28022887, -123.0057376 ],
       [  49.28024189, -123.00584829],
       [  49.28025603, -123.00598497],
       [  49.28025602, -123.00607718],
       [  49.28026076, -123.00618116],
       [  49.28025097, -123.00629313],
       [  49.28025058, -123.00640326],
       [  49.2802346 , -123.00654477],
       [  49.28022327, -123.00668588],
       [  49.28021208, -123.00683016],
       [  49.28019873, -123.00697433],
       [  49.28017777, -123.00715352],
       [  49.28015569, -123.00734488],
       [  49.28014096, -123.00752049],
       [  49.28012903, -123.00770394],
       [  49.28011739, -123.0078791 ],
       [  49.280116  , -123.00806023],
       [  49.28011724, -123.00825899],
       [  49.28011764, -123.00844341],
       [  49.2801226 , -123.00862214],
       [  49.28011489, -123.00880338],
       [  49.28013664, -123.0089675 ],
       [  49.28015835, -123.00913029],
       [  49.2801752 , -123.00930049],
       [  49.28019316, -1

In [24]:
smooth_df = pd.DataFrame(smooth_points2)

In [25]:
smooth_df = smooth_df.rename(columns={0: "lat", 1: "lon"})

In [26]:
smooth_df

Unnamed: 0,lat,lon
0,49.280217,-123.005659
1,49.280229,-123.005738
2,49.280242,-123.005848
3,49.280256,-123.005985
4,49.280256,-123.006077
5,49.280261,-123.006181
6,49.280251,-123.006293
7,49.280251,-123.006403
8,49.280235,-123.006545
9,49.280223,-123.006686
