In [55]:
import glob
import os
import gpxpy
import random

import pandas as pd

import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

import haversine as hs

FILE_LIMIT = 10
DATA_DIR = "/Users/arbatov/gitRepo/gpx-data/data/strava"

G = pyproj.Geod(ellps='WGS84')
gpx_files = glob.glob(os.path.join(DATA_DIR,"*.gpx"))

file_count = 0
file_contents = []

for gpx_file in gpx_files:
    with open(gpx_file) as file:
        gpx = gpxpy.parse(file)

        # Only get the run data now
        activity_type = gpx.tracks[0].type
        if activity_type != "Run": 
            continue
        
        gpx_points = gpx.tracks[0].segments[0].points
        for previous_point, point in zip(gpx_points, gpx_points[1:]):            
            distance = hs.haversine(
                point1=(previous_point.latitude, previous_point.longitude),
                point2=(point.latitude, point.longitude),
                unit=hs.Unit.METERS
            )

            
            file_contents.append([
                os.path.basename(gpx_file),
                gpx.tracks[0].type,
                point.time,
                point.longitude, 
                point.latitude,
                distance
            ])
        

        file_count += 1 
        if file_count >= FILE_LIMIT:
            break
        

df = pd.DataFrame(
    file_contents, 
    columns=[
        'filename',
        'type',
        'time',
        'lon',
        'lat',
        'distance'
    ])

df.head()

Unnamed: 0,filename,type,time,lon,lat,distance
0,3157394637.gpx,Run,2019-12-28 21:40:57+00:00,103.874468,1.297067,1.361685
1,3157394637.gpx,Run,2019-12-28 21:40:58+00:00,103.874458,1.297066,1.145138
2,3157394637.gpx,Run,2019-12-28 21:40:59+00:00,103.874447,1.297065,1.140048
3,3157394637.gpx,Run,2019-12-28 21:41:00+00:00,103.874427,1.297065,2.276692
4,3157394637.gpx,Run,2019-12-28 21:41:01+00:00,103.874406,1.297063,2.284343


In [60]:
df.filename.nunique()

10

In [61]:
df.groupby('filename').distance.sum()

filename
2753647336.gpx     6872.966933
2761730247.gpx     8347.012340
2818479737.gpx     9497.803089
3039897185.gpx     8239.418395
3097022517.gpx     5741.357306
3157394637.gpx    20291.257576
3215277911.gpx     7917.312116
3307575299.gpx     6069.734350
3371480307.gpx    12012.416587
3573272880.gpx     7453.493043
Name: distance, dtype: float64