In [8]:
import pandas as pd  # Import pandas for DataFrame creation
import json  # Import json for reading local JSON files
import os
import gpxpy
import xml.etree.ElementTree as ET
import numpy as np
from haversine import haversine, Unit

# Function to extract data from a local JSON file
def extract_data_from_json_file(file_path):
    try:
        # Load the JSON data from the file
        with open(file_path, 'r') as file:
            data = json.load(file)
        
        # Extract altitude, distance, and position (latitude, longitude)
        extracted_data = []
        for item in data:  # Assuming the JSON is a list of objects
            altitude = item.get('altitude')
            distance = item.get('distance')
            position = item.get('position', {})
            latitude = position.get('k')  # Adjusted for 'k'
            longitude = position.get('A')  # Adjusted for 'A'
            
            extracted_data.append({
                'altitude': altitude,
                'distance': distance,
                'latitude': latitude,
                'longitude': longitude
            })
        
        return extracted_data
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return []
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        return []



In [None]:
# Convert extracted data to a pandas DataFrame
def data_to_dataframe(data):
    return pd.DataFrame(data)

# Example usage
file_path = "path_to_your_downloaded_json_file.json"  # Replace with the actual file path
extracted_data = extract_data_from_json_file(file_path)
if extracted_data:
    df = data_to_dataframe(extracted_data)
    print(df.head())  # Display the first few rows of the DataFrame

Error fetching data: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))


In [9]:
# Directory containing GPX files
gpx_dir = "data/gpx_files"
output_dir = "data/gpx_parsed"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Function to calculate haversine distance
def haversine_distance(lat1, lon1, lat2, lon2):
    return haversine((lat1, lon1), (lat2, lon2), unit=Unit.METERS)

# Function to parse metadata using ElementTree
def parse_metadata(file_path):
    tree = ET.parse(file_path)
    root = tree.getroot()
    namespace = {"default": "http://www.topografix.com/GPX/1/1"}

    # Extract link
    link = root.find("default:metadata/default:author/default:link", namespace)
    link = link.attrib["href"] if link is not None else "No link"

    return link

#Avoids haversine package
def fast_haversine_np(df):
    lat = np.radians(df["latitude"].to_numpy())
    lon = np.radians(df["longitude"].to_numpy())
    
    dlat = lat[1:] - lat[:-1]
    dlon = lon[1:] - lon[:-1]

    a = np.sin(dlat / 2.0) ** 2 + np.cos(lat[:-1]) * np.cos(lat[1:]) * np.sin(dlon / 2.0) ** 2
    c = 2 * np.arcsin(np.sqrt(a))
    R = 6371000  # Earth radius in meters
    dist = R * c

    return np.insert(dist, 0, 0)

# Function to parse a single GPX file and calculate derived fields
def parse_gpx_file(file_path):
    
    # Extract the file name (without extension) as the name
    name = os.path.splitext(os.path.basename(file_path))[0]
    # Parse metadata
    link = parse_metadata(file_path)

    # Parse track points using gpxpy
    with open(file_path, 'r') as gpx_file:
        gpx = gpxpy.parse(gpx_file)

    route_info = []
    for track in gpx.tracks:
        for segment in track.segments:
            for point in segment.points:
                route_info.append({
                    "latitude": point.latitude,
                    "longitude": point.longitude,
                    "elevation": point.elevation,
                    "time": point.time,
                    "name": name,
                    "link": link
                })

    # Convert to DataFrame
    route_df = pd.DataFrame(route_info)

    
    route_df["distance"] = fast_haversine_np(route_df)
    # Calculate elevation difference
    route_df['elevation_diff'] = route_df['elevation'].diff().fillna(0)

    # Calculate cumulative elevation and cumulative distance
    route_df['cum_elevation'] = route_df['elevation_diff'].cumsum()
    route_df['cum_distance'] = route_df['distance'].cumsum()

    return route_df

In [10]:
# Iterate over all GPX files in the directory
for file_name in os.listdir(gpx_dir):
    if file_name.endswith("giro-abruzzo-2025-stage-1.gpx"):
        file_path = os.path.join(gpx_dir, file_name)
        print(f"Processing {file_name}...")

        # Parse the GPX file and calculate derived fields
        df = parse_gpx_file(file_path)

        # Export to CSV
        output_file = os.path.join(output_dir, f"{os.path.splitext(file_name)[0]}.csv")
        df.to_csv(output_file, index=False)
        print(f"Saved to {output_file}")

print("All GPX files have been processed.")

Processing giro-abruzzo-2025-stage-1.gpx...
Saved to data/gpx_parsed\giro-abruzzo-2025-stage-1.csv
All GPX files have been processed.


In [11]:
df = pd.read_csv("data/gpx_parsed/giro-abruzzo-2025-stage-1.csv")
df.head()

Unnamed: 0,latitude,longitude,elevation,time,name,link,distance,elevation_diff,cum_elevation,cum_distance
0,42.09387,14.63236,82.0,2025-04-15 14:36:39+02:00,giro-abruzzo-2025-stage-1,https://www.la-flamme-rouge.eu/maps/viewtrack/...,0.0,0.0,0.0,0.0
1,42.09623,14.63277,83.0,2025-04-15 14:37:02+02:00,giro-abruzzo-2025-stage-1,https://www.la-flamme-rouge.eu/maps/viewtrack/...,264.591552,1.0,1.0,264.591552
2,42.09696,14.63285,83.0,2025-04-15 14:37:10+02:00,giro-abruzzo-2025-stage-1,https://www.la-flamme-rouge.eu/maps/viewtrack/...,81.440227,0.0,1.0,346.031779
3,42.09756,14.63298,84.0,2025-04-15 14:37:15+02:00,giro-abruzzo-2025-stage-1,https://www.la-flamme-rouge.eu/maps/viewtrack/...,67.573655,1.0,2.0,413.605433
4,42.09814,14.63308,84.0,2025-04-15 14:37:21+02:00,giro-abruzzo-2025-stage-1,https://www.la-flamme-rouge.eu/maps/viewtrack/...,65.018675,0.0,2.0,478.624108
