# GPX data analysis

Prerequisites:

In [None]:
import os

from glob import glob

import matplotlib.pyplot as plt
import numpy             as np
import pandas            as pd

import gmplot
import gpxpy

import seawater as sw

import mplleaflet

from folium import Map, CircleMarker, PolyLine

Load single file:

In [None]:
gpx_file = open('Dog walk.gpx', 'r')
gpx = gpxpy.parse(gpx_file)

print("{} track(s)".format(len(gpx.tracks)))
track = gpx.tracks[0]

print("{} segment(s)".format(len(track.segments)))
segment = track.segments[0]

print("{} point(s)".format(len(segment.points)))

run_data = []

for track_idx, track in enumerate(gpx.tracks):
    
    track_name     = track.name
    track_time     = track.get_time_bounds().start_time
    track_length   = track.length_3d()
    track_duration = track.get_duration()
    track_speed    = track.get_moving_data().max_speed
            
    for seg_idx, segment in enumerate(track.segments):
        
        segment_length = segment.length_3d()
        
        for point_idx, point in enumerate(segment.points):
            
            print("--> point.extensions =", point.extensions)
            
            run_data.append([track_idx, 
                             track_name, 
                             track_time, 
                             track_length, 
                             track_duration, 
                             track_speed, 
                             seg_idx, 
                             segment_length, 
                             point.time, 
                             point.latitude, 
                             point.longitude, 
                             point.elevation, 
                             segment.get_speed(point_idx)])
    
column_list = ['Index', 
               'Name',
               'Time', 
               'Length', 
               'Duration', 
               'Max_Speed',
               'Segment_Index', 
               'Segment_Length', 
               'Point_Time', 
               'Point_Latitude',
               'Point_Longitude', 
               'Point_Elevation', 
               'Point_Speed']

df = pd.DataFrame(run_data, columns=column_list)
print(df.head())

Create functions to load list of files:

In [None]:
def load_run_data(gpx_path, filter=""):
    
    gpx_files = glob(os.path.join(gpx_path, filter + "*.gpx"))
    
    run_data = []
    
    for file_idx, gpx_file in enumerate(gpx_files):
    
        print("--> Processing file", file_idx)
        
        gpx = gpxpy.parse(open(gpx_file, 'r'))
        
        for track_idx, track in enumerate(gpx.tracks):
            
            print("---> Processing track", track_idx)
            
            track_name     = track.name
            track_time     = track.get_time_bounds().start_time
            track_length   = track.length_3d()
            track_duration = track.get_duration()
            track_speed    = track.get_moving_data().max_speed
            
            for seg_idx, segment in enumerate(track.segments):
                
                print("----> Processing segment", seg_idx)
                
                segment_length = segment.length_3d()
                
                for point_idx, point in enumerate(segment.points):
                    
                    run_data.append([file_idx, 
                                     os.path.basename(gpx_file), 
                                     track_idx, 
                                     track_name, 
                                     track_time, 
                                     track_length, 
                                     track_duration, 
                                     track_speed, 
                                     seg_idx, 
                                     segment_length, 
                                     point.time, 
                                     point.latitude, 
                                     point.longitude, 
                                     point.elevation, 
                                     segment.get_speed(point_idx)])
                    
    return run_data

column_list1 = ['File_Index', 'File_Name'] + column_list 

def load_run_data2(gpx_path, filter=""):
    
    gpx_files = glob(os.path.join(gpx_path, filter + "*.gpx"))
    
    run_data = []
    
    for file_idx, gpx_file in enumerate(gpx_files):
    
        print("--> Processing file", file_idx)
        
        gpx = gpxpy.parse(open(gpx_file, 'r'))
        
        for track_idx, track in enumerate(gpx.tracks):
        
            track_name     = track.name
            track_time     = track.get_time_bounds().start_time
            track_length   = track.length_3d()
            track_duration = track.get_duration()
            track_speed    = track.get_moving_data().max_speed
            
            if (track_name == 'Flying with Bill'):
                continue
            
            run_data.append([file_idx, 
                             os.path.basename(gpx_file), 
                             track_idx, 
                             track_name, 
                             track_time, 
                             track_length, 
                             track_duration, 
                             track_speed])

    return run_data

column_list2 = ['File_Index', 
                'File_Name', 
                'Index', 
                'Name',
                'Time', 
                'Length', 
                'Duration', 
                'Max_Speed']

Load list of files:

In [None]:
data = load_run_data (gpx_path='./data/', filter="")
#data = load_run_data2(gpx_path='./data/', filter="")

df = pd.DataFrame(data, columns=column_list1)
#df = pd.DataFrame(data, columns=column_list2)

df.to_pickle("garmindataframe.pkl")

print(df.shape)
print(df.head())

In [None]:
#with pd.option_context('display.max_rows', None, 'display.max_columns', 3):
#    print(df['Name'])

print('Total distance as summed between points in track = {:8.2f} miles'.format(sum(df['Length'][1:])*0.000621371))

print('Total distance as summed between points in track = {:8.2f} km'.format(sum(df['Length'][1:])*0.001))

print('Total time     as summed between points in track = {:8.2f} hours'.format(sum(df['Duration'][1:])/3600))

In [None]:
df = pd.read_pickle("garmindataframe.pkl")

print(df.shape)
print(df.head())

Create plots:

In [None]:
fig = plt.figure(facecolor = '0.1')
ax = plt.Axes(fig, [0., 0., 1., 1.], )
ax.set_aspect('equal')
ax.set_axis_off()
fig.add_axes(ax)
plt.plot(df['Point_Longitude'], df['Point_Latitude'], color = 'deepskyblue', lw = 0.3, alpha = 0.9)
plt.show()

In [None]:
#gmap = gmplot.GoogleMapPlotter(lat[0], lon[0], 20) # lat & lon of map center and default zoom level of mapd
#gmap.heatmap(lat, lon)

gmap = gmplot.GoogleMapPlotter(df.loc[0,'Point_Latitude'], df.loc[0,'Point_Longitude'], 15)
gmap.heatmap(df['Point_Latitude'], df['Point_Longitude'], maxIntensity=50)

gmap.draw("test.html")

In [None]:
_, angles = sw.dist(df['Point_Latitude'], df['Point_Longitude'])
angles = np.r_[0, np.deg2rad(angles)]

# Normalize the speed to use as the length of the arrows
r = df['Point_Speed'] / df['Point_Speed'].max()
#kw = dict(window_len=31, window='hanning')
#df['u'] = smoo1(r * np.cos(angles), **kw)
#df['v'] = smoo1(r * np.sin(angles), **kw)
df['u'] = r * np.cos(angles)
df['v'] = r * np.sin(angles)

fig, ax = plt.subplots()
df = df.dropna()
ax.plot(df['Point_Longitude'], df['Point_Latitude'],
        color='darkorange', linewidth=5, alpha=0.5)
sub = 50
ax.quiver(df['Point_Longitude'][::sub], df['Point_Latitude'][::sub], 
          df['u'][::sub], df['v'][::sub], color='deepskyblue', alpha=0.8, scale=10)

#mplleaflet.display(fig=fig)  # shows map inline in Jupyter but takes up full width
#mplleaflet.display(fig=fig, tiles='esri_aerial')  # shows aerial/satellite photo
mplleaflet.show(path='mpl.html', tiles='esri_aerial')  # saves to html file for display below

In [None]:
mymap = Map( location=[ df.Latitude.mean(), df.Longitude.mean() ], zoom_start=14)
#PolyLine(df[['Point_Latitude','Point_Longitude']].values, color="red", weight=2.5, opacity=1).add_to(mymap)
for i, coord in enumerate(df[['Point_Latitude','Point_Longitude']].values):
    if (i%500 == 0):
        print("i=",i)
    CircleMarker(location=[coord[0],coord[1]], radius=1,color='red').add_to(mymap)
#mymap   # shows map inline in Jupyter but takes up full width
mymap.save('fol.html')  # saves to html file for display below