In [1]:
import math
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from pykalman import KalmanFilter
from xml.dom.minidom import parse, parseString


In [2]:
gps_doc = parse('walk1.gpx')
elements = gps_doc.getElementsByTagName('trkpt')

In [3]:
elements.item(0).getAttribute("lat"), elements.item(0).getAttribute("lon")

('49.2787398989', '-123.0161041655')

In [4]:
t = elements.item(0).getElementsByTagName('time')
t[0].firstChild.nodeValue

'2022-06-08T18:12:08Z'

In [5]:
# read data from gpx file 
# create function to extract cols 
def element_to_data(ele):
    lat = float(ele.getAttribute("lat"))
    lon = float(ele.getAttribute("lon"))
    dtime = ele.getElementsByTagName('time')
    dtime = pd.to_datetime(dtime[0].firstChild.nodeValue, utc=True)
    
    return lat, lon, dtime

# read file
gps_doc = parse('walk1.gpx')
# get 'trkpt' element from file 
gps_elements = gps_doc.getElementsByTagName('trkpt')
# get data from elment and create df with cols 
gps_data = pd.DataFrame(list(map(element_to_data, gps_elements)),
                         columns=['lat', 'lon', 'datetime'])
# set datetime as index
gps_data = gps_data.set_index('datetime')
gps_data.head()

Unnamed: 0_level_0,lat,lon
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-06-08 18:12:08+00:00,49.27874,-123.016104
2022-06-08 18:12:12+00:00,49.278679,-123.016151
2022-06-08 18:12:16+00:00,49.278673,-123.01612
2022-06-08 18:12:20+00:00,49.27857,-123.015954
2022-06-08 18:12:24+00:00,49.278584,-123.015928


In [6]:
# read data from csv file and add to gps df 
csv_data = pd.read_csv('walk1.csv', parse_dates=['datetime']).set_index('datetime')
gps_data['Bx'] = csv_data['Bx']
gps_data['By'] = csv_data['By']
gps_data.head()

Unnamed: 0_level_0,lat,lon,Bx,By
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-06-08 18:12:08+00:00,49.27874,-123.016104,0.0,0.0
2022-06-08 18:12:12+00:00,49.278679,-123.016151,-18.596086,5.550491
2022-06-08 18:12:16+00:00,49.278673,-123.01612,-18.862977,1.294468
2022-06-08 18:12:20+00:00,49.27857,-123.015954,-15.199905,-8.701877
2022-06-08 18:12:24+00:00,49.278584,-123.015928,-13.832881,-11.560094


In [7]:
# functions 'deg2rad' & 'distance' referenced from https://stackoverflow.com/questions/27928/calculate-distance-between-two-latitude-longitude-points-haversine-formula/21623206
def deg2rad(deg) :
  return deg * (math.pi/180)

# write a function to calculate distance 
def distance(points):
    R = 6371
    points['lat1'] = points['lat'].shift(-1)
    points['lon1'] = points['lon'].shift(-1)
    points = points.dropna().copy()

    points['dLat'] = deg2rad((points['lat1']-points['lat']))
    points['dLon'] = deg2rad((points['lon1']-points['lon']))

    points['a'] = np.sin(points['dLat']/2) * np.sin(points['dLat']/2) + np.cos(deg2rad(points['lat'])) * np.cos(deg2rad(points['lat1'])) * np.sin(points['dLon']/2) * np.sin(points['dLon']/2)

    points['c'] = 2 * np.arctan2(np.sqrt(points['a']), np.sqrt(1-points['a']))

    points['d'] = R * points['c'] * 1000

    return(points['d'].sum())
    


In [8]:
# calculate unfiltered distance
points = gps_data[['lat', 'lon']]

dist = distance(points).round(6)
print(f'Unfiltered distance: {dist:.2f}')

Unfiltered distance: 1095.76


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  points['lat1'] = points['lat'].shift(-1)


In [12]:
# prepare data for kalman
kalman_data = gps_data[['lat', 'lon', 'Bx', 'By']]
initial_state = kalman_data.iloc[0]
# observation_covariance expresses how much you believe the sensors
# if the sensor is very accurate, small values should be used
observation_covariance = np.diag([0.2, 0.2, 1, 1]) ** 2 # TODO: shouldn't be zero
# transition_covariance expresses how accurate your prediction is
transition_covariance = np.diag([0.5, 0.5, 1, 1]) ** 2 # TODO: shouldn't be zero
transition = [[1, 0, 0.0000006, 0.00000029], [0, 1, -0.00000043, 0.00000012], [0, 0, 1, 0], [0, 0, 0, 1]] # TODO: shouldn't (all) be zero

# apply kalman smoothing 
kf = KalmanFilter(
    initial_state_mean=initial_state,
    initial_state_covariance=observation_covariance,
    observation_covariance=observation_covariance,
    transition_covariance=transition_covariance,
    transition_matrices=transition
)
kalman_smoothed, covar = kf.smooth(kalman_data)


In [13]:
# calculate unfiltered distance
points1 = pd.DataFrame(kalman_smoothed[:, 0:2], columns=['lat', 'lon'])

dist1 = distance(points1).round(6)
print(f'Unfiltered distance: {dist1:.2f}')


Unfiltered distance: 898.58


In [11]:
df = pd.DataFrame({'x': [1, 2, 5, 8, 4]})
df['x1'] = df['x'].shift(-1)
df = df.dropna().copy()
df['distance'] = (df['x1'] - df['x']).abs()
print(df)

   x   x1  distance
0  1  2.0       1.0
1  2  5.0       3.0
2  5  8.0       3.0
3  8  4.0       4.0
