https://towardsdatascience.com/how-tracking-apps-analyse-your-gps-data-a-hands-on-tutorial-in-python-756d4db6715d
https://towardsdatascience.com/how-to-make-your-pandas-loop-71-803-times-faster-805030df4f06
https://realpython.com/fast-flexible-pandas/

In [1]:
!pip install haversine



In [2]:
import gpxpy
import matplotlib.pyplot as plt
import datetime
from geopy import distance
from math import sqrt, floor
import numpy as np
import pandas as pd
import haversine
import os
import re

In [3]:
from os import listdir
from os.path import isfile, join
path = os.path.join(os.path.abspath(''), 'tracks')
allfiles = [f for f in listdir(path) if isfile(join(path, f))]

# Probleemgevallen verwijderen:
# while '20120704_192600.gpx' in allfiles: allfiles.remove('20120704_192600.gpx')
while '20170429_123929.gpx' in allfiles: allfiles.remove('20170429_123929.gpx')
while '.DS_Store' in allfiles: allfiles.remove('.DS_Store')

print(allfiles)

['20150201_105601.gpx', '20180325_090235.gpx', '20121127_191701.gpx', '20160518_165805.gpx', 'Nieuwe Asics GT4000.gpx', '20180819_084758.gpx', '20190407_084824.gpx', '20150411_085108.gpx', '20120827_192731.gpx', '20191012_091115.gpx', '20190928_082644.gpx', '20140405_082329.gpx', '20191117_101539.gpx', '20191120_194720.gpx', '20170401_080131.gpx', '20121008_175600.gpx', '20190203_094709.gpx', '20160825_163509.gpx', '20170429_113634.gpx', 'Running 5_4_12 10_44 pm.gpx', '20121109_204410.gpx', '20160703_092613.gpx', '20150818_161456.gpx', '20190317_094119.gpx', '20171008_074753.gpx', '20121215_102614.gpx', '20151219_094829.gpx', '20120704_192600.gpx', '20190528_184901.gpx', '20180303_095013.gpx', '20170925_183951.gpx', '20150811_162530.gpx', 'Running 5_15_12 10_04 pm.gpx', '20190130_182900.gpx', '20190120_090601.gpx', '20140215_085202.gpx', '10k met de Enschede Marathon 2019.gpx', '20190521_170801.gpx', 'Running 5_19_12 9_42 am.gpx', '20140329_104844.gpx', '20190507_172037.gpx', '20190402

In [4]:
sections = [1000,(1000*1.60934),3000,5000,(5000*1.60934),10000,15000,(10000*1.60934),20000,21097.5,25000,30000,40000,42195]
# sections = [15000]

df_final = pd.DataFrame(columns=['time', 'distance', 'minutes_per_kilometer'])

for file in allfiles:
    
    print('File:', file)
    
    path = os.path.join(os.path.abspath(''), 'tracks', file)

    gpx_file = open(path, 'r')
    gpx = gpxpy.parse(gpx_file)
    
    df = pd.DataFrame(columns=['lon', 'lat', 'alt', 'time'])
    
    # -----------------------------------------------------
    # Inlezen van alle files geeft problemen, omdat de bestanden verschillen van elkaar.
    
    if re.search('^Running', file) is None: # Als de bestandsnaam niet begint met 'Running', dan is het een Endomondo file
        
        if len(gpx.tracks[0].segments) == 1: # Soms komt het voor dat een bestand maar uit 1 segment bestaat, ook bij Endomondo.
            
            segment = gpx.tracks[0].segments[0]
            
            data = segment.points

            for point in data:
                df = df.append({'lon': point.longitude, 'lat' : point.latitude, 'alt' : point.elevation, 'time' : point.time}, ignore_index=True)
            
        else: 
            for segment in gpx.tracks[0].segments[:-1]: # alle segments, behalve de laatste.

                data = segment.points

                for point in data:
                    df = df.append({'lon': point.longitude, 'lat' : point.latitude, 'alt' : point.elevation, 'time' : point.time}, ignore_index=True)
            
    else: # Als de bestandsnaam wel met 'Running' begint, dan is het een oud bestand. Daarvan moeten we alle segments inlezen.
        
        for segment in gpx.tracks[0].segments: # alle segments, behalve de laatste.

            data = segment.points

            for point in data:
                df = df.append({'lon': point.longitude, 'lat' : point.latitude, 'alt' : point.elevation, 'time' : point.time}, ignore_index=True)

    # -----------------------------------------------------
    
    df = df.sort_values(by=['time'])
    df = df.reset_index()
            
    df = df.fillna(method='ffill')
    df = df.fillna(method='bfill')

    # Create a column with values that are 'shifted' one backwards, so we can create calculations for differences.
    df['lon-start'] = df['lon']
    df['lon-start'].iloc[-1] = np.nan
    df['lon-start'] = np.roll(df['lon-start'], 1)
    df['lat-start'] = df['lat']
    df['lat-start'].iloc[-1] = np.nan
    df['lat-start'] = np.roll(df['lat-start'], 1)
    df['alt-start'] = df['alt']
    df['alt-start'].iloc[-1] = np.nan
    df['alt-start'] = np.roll(df['alt-start'], 1)
    df['time-start'] = df['time']
    df['time-start'].iloc[-1] = np.nan
    df['time-start'] = np.roll(df['time-start'], 1)
    df = df.fillna(method='bfill')

    df['time'] = pd.to_datetime(df['time'], utc=True)
    df['time'] = df['time'].dt.tz_localize(tz=None)
    df['time-start'] = pd.to_datetime(df['time-start'], utc=True)
    df['time-start'] = df['time-start'].dt.tz_localize(tz=None)

    #Calculate distances and time deltas
    df['distance_dis_2d'] = df.apply(lambda x: distance.distance((x['lat-start'], x['lon-start']), (x['lat'], x['lon'])).m, axis = 1)
    df['alt_dif'] = df.apply(lambda x: x['alt-start'] - x['alt'], axis=1)
    df['distance_dis_3d'] = df.apply(lambda x: sqrt(x['distance_dis_2d']**2 + (x['alt_dif'])**2), axis=1)
    df['time_delta'] = df.apply(lambda x: (x['time'] - x['time-start']).total_seconds(), axis=1)
    
    df_selected = df.loc[:, ['distance_dis_3d','time_delta']]

    df_selected['distance_cumsum'] = df_selected['distance_dis_3d'].cumsum()
    df_selected['time_cumsum'] = df_selected['time_delta'].cumsum()
    
    # Hier komt de loop voor sections
    for section in sections:
    
        if df['distance_dis_3d'].sum() < section: # Als deze ronde kleiner is dan de afstand die we zoeken hoeven we ook niet verder te rekenen.
            # print('  -- Afstand die we zoeken niet behaald.')
            continue
            
        print(' - Section:', section)
        df_output = pd.DataFrame(columns=['date', 'section', 'filename', 'time', 'distance', 'minutes_per_kilometer', 'total_distance', 'total_time'])

        for i in range(len(df_selected.index)):

            df_section = df_selected[(df_selected['distance_cumsum'] - df_selected['distance_cumsum'].iat[i]) >= section]
            if(len(df_section.index) != 0):
                time = df_section['time_cumsum'].iat[0] - df_selected['time_cumsum'].iat[i]
                distance_i = df_section['distance_cumsum'].iat[0] - df_selected['distance_cumsum'].iat[i]
                minutes_per_kilometer = (time/60)/(distance_i/1000)
                df_output = df_output.append({'date': df['time'].min(), 'section': section, 'filename': file, 'time': time, 'distance': distance_i, 'minutes_per_kilometer': minutes_per_kilometer, 'total_distance': df['distance_dis_3d'].sum(), 'total_time': df['time_delta'].sum()}, ignore_index=True)

        if df_output.empty == True:
            print('Error?!')
        else:
            s_best = df_output.loc[df_output['minutes_per_kilometer'].idxmin()]
            df_final = df_final.append(s_best)
        
df_final['start_index_best_section'] = df_final.index
df_final = df_final.set_index(['filename','section'])

print('Finished!')

File: 20150201_105601.gpx


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
	To accept the future behavior, pass 'dtype=object'.
	To keep the old behavior, pass 'dtype="datetime64[ns]"'.
  a = asanyarray(a)


 - Section: 1000
 - Section: 1609.34
 - Section: 3000
 - Section: 5000
File: 20180325_090235.gpx
 - Section: 1000
 - Section: 1609.34
 - Section: 3000
 - Section: 5000
File: 20121127_191701.gpx
 - Section: 1000
 - Section: 1609.34
 - Section: 3000
 - Section: 5000
 - Section: 8046.7
 - Section: 10000
File: 20160518_165805.gpx
 - Section: 1000
 - Section: 1609.34
 - Section: 3000
 - Section: 5000
File: Nieuwe Asics GT4000.gpx
 - Section: 1000
 - Section: 1609.34
 - Section: 3000
 - Section: 5000
File: 20180819_084758.gpx
 - Section: 1000
 - Section: 1609.34
File: 20190407_084824.gpx
 - Section: 1000
 - Section: 1609.34
 - Section: 3000
 - Section: 5000
 - Section: 8046.7
 - Section: 10000
File: 20150411_085108.gpx
 - Section: 1000
 - Section: 1609.34
 - Section: 3000
 - Section: 5000
File: 20120827_192731.gpx
 - Section: 1000
 - Section: 1609.34
 - Section: 3000
 - Section: 5000
 - Section: 8046.7
 - Section: 10000
File: 20191012_091115.gpx
 - Section: 1000
 - Section: 1609.34
 - Sectio

In [5]:
df_final

Unnamed: 0_level_0,Unnamed: 1_level_0,time,distance,minutes_per_kilometer,date,total_distance,total_time,start_index_best_section
filename,section,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
20150201_105601.gpx,1000.00,292.0,1001.160200,4.861027,2015-02-01 10:56:01,7060.048833,2163.0,116
20150201_105601.gpx,1609.34,476.0,1621.970891,4.891169,2015-02-01 10:56:01,7060.048833,2163.0,111
20150201_105601.gpx,3000.00,894.0,3028.741663,4.919535,2015-02-01 10:56:01,7060.048833,2163.0,52
20150201_105601.gpx,5000.00,1498.0,5014.780916,4.978616,2015-02-01 10:56:01,7060.048833,2163.0,1
20180325_090235.gpx,1000.00,288.0,1004.653776,4.777765,2018-03-25 09:02:35,5993.427951,1789.0,302
...,...,...,...,...,...,...,...,...
20191211_081322.gpx,10000.00,3031.0,10000.482595,5.051423,2019-12-11 08:13:22,13303.634864,4053.0,691
20170723_152756.gpx,1000.00,293.0,1017.739969,4.798213,2017-07-23 15:27:56,5000.565117,1504.0,186
20170723_152756.gpx,1609.34,482.0,1628.693267,4.932380,2017-07-23 15:27:56,5000.565117,1504.0,158
20170723_152756.gpx,3000.00,907.0,3021.953078,5.002284,2017-07-23 15:27:56,5000.565117,1504.0,91


In [6]:
df_final.to_csv('test.csv')

In [7]:
df_input = pd.read_csv('test.csv', header=0, index_col=[0,1])

In [8]:
df_final.unstack()

Unnamed: 0_level_0,time,time,time,time,time,time,time,time,time,time,...,start_index_best_section,start_index_best_section,start_index_best_section,start_index_best_section,start_index_best_section,start_index_best_section,start_index_best_section,start_index_best_section,start_index_best_section,start_index_best_section
section,1000.00,1609.34,3000.00,5000.00,8046.70,10000.00,15000.00,16093.40,20000.00,21097.50,...,1000.00,1609.34,3000.00,5000.00,8046.70,10000.00,15000.00,16093.40,20000.00,21097.50
filename,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
10k met de Enschede Marathon 2019.gpx,251.0,418.0,792.0,1329.0,2162.0,2730.0,,,,,...,1600.0,1559.0,686.0,882.0,347.0,22.0,,,,
20120614_192813.gpx,282.0,469.0,878.0,1483.0,2466.0,3105.0,,,,,...,3.0,3.0,3.0,3.0,3.0,2.0,,,,
20120618_202744.gpx,260.0,437.0,826.0,1400.0,,,,,,,...,280.0,245.0,169.0,51.0,,,,,,
20120622_203116.gpx,288.0,481.0,,,,,,,,,...,41.0,1.0,,,,,,,,
20120622_205613.gpx,283.0,477.0,895.0,1511.0,,,,,,,...,270.0,79.0,12.0,28.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Running 5_27_12 7_27 pm.gpx,293.0,484.0,907.0,1529.0,2474.0,,,,,,...,518.0,149.0,114.0,122.0,37.0,,,,,
Running 5_4_12 10_44 pm.gpx,304.0,492.0,926.0,1575.0,,,,,,,...,65.0,113.0,29.0,31.0,,,,,,
Singelloop 2016.gpx,260.0,417.0,781.0,1310.0,2117.0,,,,,,...,180.0,174.0,99.0,99.0,6.0,,,,,
Singelloop Enschede.gpx,260.0,424.0,799.0,1335.0,2176.0,,,,,,...,289.0,265.0,208.0,127.0,1.0,,,,,


In [9]:
idx = pd.IndexSlice
df_final.loc[idx[:,1000],'total_distance'].sum() / 1000

2437.894076948313

In [10]:
df_final.loc[idx[:,1000],['date','total_distance']]

Unnamed: 0_level_0,Unnamed: 1_level_0,date,total_distance
filename,section,Unnamed: 2_level_1,Unnamed: 3_level_1
20150201_105601.gpx,1000.0,2015-02-01 10:56:01,7060.048833
20180325_090235.gpx,1000.0,2018-03-25 09:02:35,5993.427951
20121127_191701.gpx,1000.0,2012-11-27 19:17:01,10137.945790
20160518_165805.gpx,1000.0,2016-05-18 16:58:05,7004.598062
Nieuwe Asics GT4000.gpx,1000.0,2019-05-04 18:56:59,7973.808180
...,...,...,...
20190818_081719.gpx,1000.0,2019-08-18 08:17:19,16088.039483
20190824_080355.gpx,1000.0,2019-08-24 08:03:55,16367.937345
20160316_200401.gpx,1000.0,2016-03-16 20:04:01,6239.000309
20191211_081322.gpx,1000.0,2019-12-11 08:13:22,13303.634864


In [11]:
df_total_distance = df_final.loc[idx[:,1000],['date','total_distance']]
df_total_distance

Unnamed: 0_level_0,Unnamed: 1_level_0,date,total_distance
filename,section,Unnamed: 2_level_1,Unnamed: 3_level_1
20150201_105601.gpx,1000.0,2015-02-01 10:56:01,7060.048833
20180325_090235.gpx,1000.0,2018-03-25 09:02:35,5993.427951
20121127_191701.gpx,1000.0,2012-11-27 19:17:01,10137.945790
20160518_165805.gpx,1000.0,2016-05-18 16:58:05,7004.598062
Nieuwe Asics GT4000.gpx,1000.0,2019-05-04 18:56:59,7973.808180
...,...,...,...
20190818_081719.gpx,1000.0,2019-08-18 08:17:19,16088.039483
20190824_080355.gpx,1000.0,2019-08-24 08:03:55,16367.937345
20160316_200401.gpx,1000.0,2016-03-16 20:04:01,6239.000309
20191211_081322.gpx,1000.0,2019-12-11 08:13:22,13303.634864


In [12]:
start_date = pd.to_datetime('2019-01-01', format='%Y-%m-%d')
end_date = pd.to_datetime('2019-12-31', format='%Y-%m-%d')
df_total_distance_2019 = df_total_distance[((df_total_distance['date'] >= start_date) & (df_total_distance['date'] <= end_date))]
df_total_distance_2019

Unnamed: 0_level_0,Unnamed: 1_level_0,date,total_distance
filename,section,Unnamed: 2_level_1,Unnamed: 3_level_1
Nieuwe Asics GT4000.gpx,1000.0,2019-05-04 18:56:59,7973.808180
20190407_084824.gpx,1000.0,2019-04-07 08:48:24,12069.805388
20191012_091115.gpx,1000.0,2019-10-12 09:11:15,21138.745824
20190928_082644.gpx,1000.0,2019-09-28 08:26:44,17954.455656
20191117_101539.gpx,1000.0,2019-11-17 10:15:39,9990.222797
...,...,...,...
20191002_172159.gpx,1000.0,2019-10-02 17:21:59,10470.873454
20191208_115426.gpx,1000.0,2019-12-08 11:54:26,16081.033047
20190818_081719.gpx,1000.0,2019-08-18 08:17:19,16088.039483
20190824_080355.gpx,1000.0,2019-08-24 08:03:55,16367.937345


In [13]:
print('Totale afstand 2019:')
df_total_distance_2019['total_distance'].sum() / 1000

Totale afstand 2019:


838.2463228115006

In [14]:
start_date = pd.to_datetime('2020-01-01', format='%Y-%m-%d')
df_total_distance_2020 = df_total_distance[(df_total_distance['date'] >= start_date)]
print('Totale afstand 2020:')
df_total_distance_2020['total_distance'].sum() / 1000

Totale afstand 2020:


111.39786003262289

In [15]:
start_date = pd.to_datetime('2019-02-07', format='%Y-%m-%d')
end_date = pd.to_datetime('2020-02-06', format='%Y-%m-%d')
df_total_distance_12Months = df_total_distance[((df_total_distance['date'] >= start_date) & (df_total_distance['date'] <= end_date))]
print('Totale afstand Afgelopen 12 maanden:')
df_total_distance_12Months['total_distance'].sum() / 1000

Totale afstand Afgelopen 12 maanden:


895.3811835685991

In [16]:
start_date = pd.to_datetime('2019-12-01', format='%Y-%m-%d')
end_date = pd.to_datetime('2020-01-01', format='%Y-%m-%d')
df_total_test = df_total_distance[((df_total_distance['date'] >= start_date) & (df_total_distance['date'] < end_date))]
print('Totale afstand test:')
df_total_test['total_distance'].sum() / 1000

Totale afstand test:


64.83358674121484

In [17]:
df_total_test

Unnamed: 0_level_0,Unnamed: 1_level_0,date,total_distance
filename,section,Unnamed: 2_level_1,Unnamed: 3_level_1
20191205_185918.gpx,1000.0,2019-12-05 18:59:18,8478.987585
20191217_185651.gpx,1000.0,2019-12-17 18:56:51,9940.295428
20191221_110307.gpx,1000.0,2019-12-21 11:03:07,17029.635818
20191208_115426.gpx,1000.0,2019-12-08 11:54:26,16081.033047
20191211_081322.gpx,1000.0,2019-12-11 08:13:22,13303.634864


20120627_191504.gpx heeft datum 2019-08-15 gekregen.

In [18]:
df_final.loc[idx['20120627_191504.gpx', :], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,time,distance,minutes_per_kilometer,date,total_distance,total_time,start_index_best_section
filename,section,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
20120627_191504.gpx,1000.0,278.0,1015.922101,4.560717,2012-06-27 19:15:04,8595.370196,2464.0,327
20120627_191504.gpx,1609.34,447.0,1616.484544,4.608767,2012-06-27 19:15:04,8595.370196,2464.0,269
20120627_191504.gpx,3000.0,840.0,3012.997734,4.646535,2012-06-27 19:15:04,8595.370196,2464.0,236
20120627_191504.gpx,5000.0,1415.0,5009.209722,4.707995,2012-06-27 19:15:04,8595.370196,2464.0,212
20120627_191504.gpx,8046.7,2292.0,8051.583228,4.744409,2012-06-27 19:15:04,8595.370196,2464.0,17
