In [1]:
import os
import gpxpy
import pandas as pd
import numpy as np
from haversine import haversine, Unit

In [2]:
# Directory containing GPX files
gpx_dir = "data/gpx_files"
output_dir = "data/gpx_parsed"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Function to calculate haversine distance
def haversine_distance(lat1, lon1, lat2, lon2):
    return haversine((lat1, lon1), (lat2, lon2), unit=Unit.METERS)

# Function to parse a single GPX file and calculate derived fields
def parse_gpx_file(file_path):
    with open(file_path, 'r') as gpx_file:
        gpx = gpxpy.parse(gpx_file)

    # Extract metadata (name and link)
    name = gpx.name if gpx.name else "Unknown"
    link = gpx.links[0].href if gpx.links else "No link"

    # Extract track points
    route_info = []
    for track in gpx.tracks:
        for segment in track.segments:
            for point in segment.points:
                route_info.append({
                    "latitude": point.latitude,
                    "longitude": point.longitude,
                    "elevation": point.elevation,
                    "time": point.time,
                    "name": name,
                    "link": link
                })

    # Convert to DataFrame
    route_df = pd.DataFrame(route_info)

    # Calculate haversine distance between consecutive points
    distances = [0]  # First point has no distance
    for i in range(1, len(route_df)):
        distances.append(haversine_distance(
            route_df.iloc[i - 1]['latitude'], route_df.iloc[i - 1]['longitude'],
            route_df.iloc[i]['latitude'], route_df.iloc[i]['longitude']
        ))
    route_df['distance'] = distances

    # Calculate elevation difference
    route_df['elevation_diff'] = route_df['elevation'].diff().fillna(0)

    # Calculate cumulative elevation and cumulative distance
    route_df['cum_elevation'] = route_df['elevation_diff'].cumsum()
    route_df['cum_distance'] = route_df['distance'].cumsum()

    return route_df



In [4]:
# Iterate over all GPX files in the directory
for file_name in os.listdir(gpx_dir):
    if file_name.endswith("2017 Abu Dhabi Tour Stage 2.gpx"):
        file_path = os.path.join(gpx_dir, file_name)
        print(f"Processing {file_name}...")

        # Parse the GPX file and calculate derived fields
        df = parse_gpx_file(file_path)

        # Export to CSV
        output_file = os.path.join(output_dir, f"{os.path.splitext(file_name)[0]}.csv")
        df.to_csv(output_file, index=False)
        print(f"Saved to {output_file}")

print("All GPX files have been processed.")

Processing 2017 Abu Dhabi Tour Stage 2.gpx...


AttributeError: 'GPX' object has no attribute 'links'

In [None]:
# Iterate over all GPX files in the directory
for file_name in os.listdir(gpx_dir):
    if file_name.endswith(".gpx"):
        file_path = os.path.join(gpx_dir, file_name)
        print(f"Processing {file_name}...")

        # Parse the GPX file and calculate derived fields
        df = parse_gpx_file(file_path)

        # Export to CSV
        output_file = os.path.join(output_dir, f"{os.path.splitext(file_name)[0]}.csv")
        df.to_csv(output_file, index=False)
        print(f"Saved to {output_file}")

print("All GPX files have been processed.")