In [12]:
import pandas as pd
import gpxpy

from geopy.distance import geodesic
from datetime import datetime, timedelta

In [13]:
gpx_file = open('../data/SG200Miles2024.gpx', 'r')
gpx = gpxpy.parse(gpx_file)

In [14]:
data = []
for track in gpx.tracks:
    for segment in track.segments:
        for point in segment.points:
            data.append({'latitude': point.latitude, 'longitude': point.longitude})

In [21]:
df = pd.DataFrame(data)

df['prev_latitude'] = df['latitude'].shift(1)
df['prev_longitude'] = df['longitude'].shift(1)

In [22]:
df.head(3)

Unnamed: 0,latitude,longitude,prev_latitude,prev_longitude
0,1.34228,103.83519,,
1,1.34265,103.83574,1.34228,103.83519
2,1.34276,103.83592,1.34265,103.83574


In [28]:
df['distance'] = df.apply(
    lambda x: geodesic((x['prev_latitude'], x['prev_longitude']), (x['latitude'], x['longitude'])).meters if not pd.isna(x['prev_latitude']) and not pd.isna(x['prev_longitude']) else 0, 
    axis=1,
)
df['accumulated_distance'] = df['distance'].cumsum() / 1000.0

In [29]:
time_at_each_point = []

cutoff_time_hours = 74
total_distance = df['accumulated_distance'].iloc[-1]
required_pace = total_distance / cutoff_time_hours

start_time = datetime(2024, 5, 17, 21, 0, 0) 
current_time = start_time

for i in range(len(df)):
    time_at_each_point.append(current_time)
    required_pace_per_hour = (df['distance'].iloc[i] / 1000.0) / required_pace
    current_time += timedelta(hours=required_pace_per_hour)

df['estimated_time'] = time_at_each_point

In [32]:
df.head(3)

Unnamed: 0,latitude,longitude,prev_latitude,prev_longitude,distance,accumulated_distance,estimated_time
0,1.34228,103.83519,,,0.0,0.0,2024-05-17 21:00:00.000000
1,1.34265,103.83574,1.34228,103.83519,73.616069,0.073616,2024-05-17 21:00:00.000000
2,1.34276,103.83592,1.34265,103.83574,23.438023,0.097054,2024-05-17 21:01:00.983782


In [33]:
df.to_csv(
    '../postgis/data/gpx-route.csv', 
    columns=['latitude','longitude','accumulated_distance','estimated_time'],
    header=False,
    index=False,
)