In [None]:
# Import required libraries
import fitparse
import gpxpy
import gpxpy.gpx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import folium
import os
from datetime import timezone
import shutil

print("✅ Libraries imported successfully")
print("📦 Required packages: fitparse, gpxpy, pandas, numpy, matplotlib, folium")

def semicircle_to_degrees(semicircle_value):
    """Convert Garmin semicircle coordinates to decimal degrees"""
    if semicircle_value is None:
        return None
    return semicircle_value * (180 / 2**31)

def check_file_exists(filepath):
    """Check if file exists and return file info"""
    if not os.path.exists(filepath):
        print(f"❌ File not found: {filepath}")
        return False
    
    size_kb = os.path.getsize(filepath) / 1024
    print(f"✅ Found file: {os.path.basename(filepath)} ({size_kb:.1f} KB)")
    return True

print("🔧 Utility functions loaded")

In [None]:
# Step 1: Analyze FIT file
fit_file_path = os.path.join(data_folder, fit_filename)

if not check_file_exists(fit_file_path):
    print(f"Please place your FIT file in the '{data_folder}' folder and update the filename above.")
else:
    print("\\n=== ANALYZING FIT FILE ===")
    
    # Parse FIT file
    fitfile = fitparse.FitFile(fit_file_path)
    
    # Get all records (main data points)
    records = list(fitfile.get_messages('record'))
    print(f"📊 Total data records: {len(records):,}")
    
    # Check message types
    message_types = set()
    for record in fitfile.get_messages():
        message_types.add(record.name)
    
    unknown_messages = [msg for msg in message_types if msg.startswith('unknown')]
    print(f"⚠️  Unknown message types: {len(unknown_messages)} (these cause Strava issues)")
    
    # Analyze GPS and data availability
    gps_count = 0
    sample_data = []
    
    for i, record in enumerate(records[:1000]):  # Sample first 1000 records
        lat = record.get_value('position_lat')
        lon = record.get_value('position_long')
        
        if lat is not None and lon is not None:
            gps_count += 1
            
        if len(sample_data) < 5 and lat is not None:
            sample_data.append({
                'lat': semicircle_to_degrees(lat),
                'lon': semicircle_to_degrees(lon),
                'timestamp': record.get_value('timestamp'),
                'power': record.get_value('power'),
                'heart_rate': record.get_value('heart_rate'),
                'altitude': record.get_value('altitude')
            })
    
    gps_coverage = gps_count / 1000 * 100
    print(f"📍 GPS coverage: {gps_coverage:.1f}% (in first 1000 records)")
    
    if len(sample_data) > 0:
        print(f"📍 Sample coordinates: {sample_data[0]['lat']:.6f}, {sample_data[0]['lon']:.6f}")
        print(f"⏰ Time range: {sample_data[0]['timestamp']} to {sample_data[-1]['timestamp']}")
        
    print(\"\\n✅ FIT file analysis complete\")"

In [None]:
# Step 2: Extract GPS and training data
print("=== EXTRACTING GPS AND TRAINING DATA ===")

# Extract all GPS points with training data
gps_data = []
for record in records:
    lat_raw = record.get_value('position_lat')
    lon_raw = record.get_value('position_long')
    
    if lat_raw is not None and lon_raw is not None:
        gps_data.append({
            'timestamp': record.get_value('timestamp'),
            'lat': semicircle_to_degrees(lat_raw),
            'lon': semicircle_to_degrees(lon_raw),
            'altitude': record.get_value('altitude'),
            'speed': record.get_value('speed'),
            'power': record.get_value('power'),
            'heart_rate': record.get_value('heart_rate'),
            'cadence': record.get_value('cadence'),
            'temperature': record.get_value('temperature'),
            'distance': record.get_value('distance')
        })

# Convert to DataFrame for analysis
df = pd.DataFrame(gps_data)
print(f"📍 Extracted {len(df):,} GPS points")

# Data coverage analysis
power_coverage = df['power'].notna().sum() / len(df) * 100
hr_coverage = df['heart_rate'].notna().sum() / len(df) * 100
cadence_coverage = df['cadence'].notna().sum() / len(df) * 100

print(f"⚡ Power data: {power_coverage:.1f}% coverage")
print(f"❤️  Heart rate: {hr_coverage:.1f}% coverage") 
print(f"🚴 Cadence: {cadence_coverage:.1f}% coverage")

# Route statistics
if len(df) > 0:
    duration = df['timestamp'].max() - df['timestamp'].min()
    distance_km = df['distance'].max() / 1000 if df['distance'].max() else 0
    
    print(f\"\\n📊 ROUTE STATISTICS:")
    print(f"🚴 Distance: {distance_km:.1f} km ({distance_km/1.609:.1f} miles)")
    print(f"⏱️  Duration: {duration}")
    print(f"📈 Max elevation: {df['altitude'].max():.0f}m")
    print(f"📉 Min elevation: {df['altitude'].min():.0f}m")
    
print(\"\\n✅ Data extraction complete\")"

In [None]:
# Step 3: Visualize the route
print("=== CREATING ROUTE VISUALIZATION ===")

# Create interactive map
center_lat = df['lat'].mean()
center_lon = df['lon'].mean()

route_map = folium.Map(
    location=[center_lat, center_lon],
    zoom_start=10,
    tiles='OpenStreetMap'
)

# Add GPS track
coordinates = [[row['lat'], row['lon']] for _, row in df.iterrows()]
folium.PolyLine(
    coordinates,
    color='red',
    weight=3,
    opacity=0.8,
    popup=f'GPS Track - {len(coordinates):,} points'
).add_to(route_map)

# Add start/end markers
start_point = df.iloc[0]
end_point = df.iloc[-1]

folium.Marker(
    [start_point['lat'], start_point['lon']],
    popup=f'START: {start_point[\"timestamp\"]}',
    icon=folium.Icon(color='green', icon='play')
).add_to(route_map)

folium.Marker(
    [end_point['lat'], end_point['lon']],
    popup=f'END: {end_point[\"timestamp\"]}',
    icon=folium.Icon(color='red', icon='stop')
).add_to(route_map)

print(f"🗺️  Map created with {len(coordinates):,} GPS points")
print(\"📍 Interactive map will display below\")

# Display the map
route_map"

In [None]:
# Step 4: Training data visualization
print("=== TRAINING DATA ANALYSIS ===")

# Create elevation and training data plots
fig, axes = plt.subplots(3, 1, figsize=(15, 12))

# Distance for x-axis
df['distance_km'] = df['distance'] / 1000

# Elevation profile
axes[0].plot(df['distance_km'], df['altitude'], color='brown', linewidth=1)
axes[0].set_title('Elevation Profile', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Distance (km)')
axes[0].set_ylabel('Elevation (m)')
axes[0].grid(True, alpha=0.3)

# Power data
power_data = df[df['power'].notna()]
if len(power_data) > 0:
    axes[1].plot(power_data['distance_km'], power_data['power'], color='red', linewidth=1, alpha=0.7)
    axes[1].set_title(f'Power Data (Average: {power_data[\"power\"].mean():.0f}W)', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Distance (km)')
    axes[1].set_ylabel('Power (W)')
    axes[1].grid(True, alpha=0.3)
else:
    axes[1].text(0.5, 0.5, 'No Power Data Available', ha='center', va='center', transform=axes[1].transAxes)
    axes[1].set_title('Power Data - Not Available', fontsize=14)

# Heart rate data
hr_data = df[df['heart_rate'].notna()]
if len(hr_data) > 0:
    axes[2].plot(hr_data['distance_km'], hr_data['heart_rate'], color='blue', linewidth=1, alpha=0.7)
    axes[2].set_title(f'Heart Rate (Average: {hr_data[\"heart_rate\"].mean():.0f} bpm)', fontsize=14, fontweight='bold')
    axes[2].set_xlabel('Distance (km)')
    axes[2].set_ylabel('Heart Rate (bpm)')
    axes[2].grid(True, alpha=0.3)
else:
    axes[2].text(0.5, 0.5, 'No Heart Rate Data Available', ha='center', va='center', transform=axes[2].transAxes)
    axes[2].set_title('Heart Rate - Not Available', fontsize=14)

plt.tight_layout()
plt.show()

print(\"✅ Training data visualization complete\")"

In [None]:
# Step 5: Create Strava-compatible GPX file with training data
print("=== CREATING ENHANCED GPX FILE ===")

def create_enhanced_gpx(df, output_filename):
    \"\"\"Create GPX file with GPS data and training extensions\"\"\"
    
    # GPX header with proper namespaces
    gpx_content = '''<?xml version=\"1.0\" encoding=\"UTF-8\"?>
<gpx version=\"1.1\" creator=\"FIT GPS Recovery Tool\" 
     xmlns=\"http://www.topografix.com/GPX/1/1\"
     xmlns:gpxtpx=\"http://www.garmin.com/xmlschemas/TrackPointExtension/v1\"
     xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
     xsi:schemaLocation=\"http://www.topografix.com/GPX/1/1 http://www.topografix.com/GPX/1/1/gpx.xsd
                         http://www.garmin.com/xmlschemas/TrackPointExtension/v1 http://www.garmin.com/xmlschemas/TrackPointExtensionv1.xsd\">
  <metadata>
    <name>Recovered Ride with Training Data</name>
    <desc>GPS track recovered from FIT file with power, heart rate, and cadence</desc>
    <time>{}</time>
  </metadata>
  <trk>
    <name>Bike Ride</name>
    <type>cycling</type>
    <trkseg>
'''.format(df['timestamp'].iloc[0].strftime('%Y-%m-%dT%H:%M:%SZ'))
    
    # Add each GPS point with training data
    points_with_power = 0
    points_with_hr = 0
    
    for _, row in df.iterrows():
        lat = row['lat']
        lon = row['lon']
        timestamp = row['timestamp'].strftime('%Y-%m-%dT%H:%M:%SZ')
        elevation = row['altitude'] if pd.notna(row['altitude']) else 0
        
        # Basic track point
        point_xml = f'''      <trkpt lat=\"{lat:.6f}\" lon=\"{lon:.6f}\">
        <ele>{elevation:.1f}</ele>
        <time>{timestamp}</time>'''
        
        # Add training data extensions if available
        extensions = []
        if pd.notna(row['heart_rate']) and row['heart_rate'] > 0:
            extensions.append(f'          <gpxtpx:hr>{int(row[\"heart_rate\"])}</gpxtpx:hr>')
            points_with_hr += 1
            
        if pd.notna(row['power']) and row['power'] > 0:
            extensions.append(f'          <gpxtpx:power>{int(row[\"power\"])}</gpxtpx:power>')
            points_with_power += 1
            
        if pd.notna(row['cadence']) and row['cadence'] > 0:
            extensions.append(f'          <gpxtpx:cad>{int(row[\"cadence\"])}</gpxtpx:cad>')
            
        if pd.notna(row['temperature']):
            extensions.append(f'          <gpxtpx:atemp>{int(row[\"temperature\"])}</gpxtpx:atemp>')
        
        # Add extensions if we have any
        if extensions:
            point_xml += '''
        <extensions>
          <gpxtpx:TrackPointExtension>
'''
            point_xml += '\\n'.join(extensions)
            point_xml += '''
          </gpxtpx:TrackPointExtension>
        </extensions>'''
        
        point_xml += '''
      </trkpt>
'''
        gpx_content += point_xml
    
    # Close GPX
    gpx_content += '''    </trkseg>
  </trk>
</gpx>'''
    
    # Write file
    with open(output_filename, 'w', encoding='utf-8') as f:
        f.write(gpx_content)
    
    return points_with_power, points_with_hr

# Create the enhanced GPX file
power_points, hr_points = create_enhanced_gpx(df, enhanced_gpx_output)

print(f\"✅ Enhanced GPX created: {enhanced_gpx_output}\")
print(f\"📊 Points with power data: {power_points:,}\")
print(f\"📊 Points with heart rate: {hr_points:,}\")

# File size check
file_size_mb = os.path.getsize(enhanced_gpx_output) / (1024*1024)
print(f\"📁 File size: {file_size_mb:.1f} MB\")"

In [None]:
# Step 6: Summary and Upload Instructions
print(\"=== FINAL SUMMARY ===\\n\")

print(\"🎉 FIT GPS RECOVERY COMPLETE!\\n\")

print(\"📊 RECOVERED DATA:\")
print(f\"   📍 GPS Points: {len(df):,}\")
print(f\"   ⚡ Power Coverage: {(df['power'].notna().sum() / len(df) * 100):.1f}%\")
print(f\"   ❤️  Heart Rate Coverage: {(df['heart_rate'].notna().sum() / len(df) * 100):.1f}%\")
print(f\"   🚴 Distance: {df['distance'].max()/1609:.1f} miles\")
print(f\"   ⏱️  Duration: {df['timestamp'].max() - df['timestamp'].min()}\\n\")

print(\"📁 FILES CREATED:\")
print(f\"   ✅ {enhanced_gpx_output} - Enhanced GPX with training data\")
print(f\"   📁 File size: {os.path.getsize(enhanced_gpx_output)/(1024*1024):.1f} MB\\n\")

print(\"📤 UPLOAD TO STRAVA:\")
print(f\"   1. Upload '{enhanced_gpx_output}' to Strava\")
print(\"   2. This file includes GPS + Power + Heart Rate + Cadence data\")
print(\"   3. Strava will process all training metrics normally\\n\")

print(\"🔧 TROUBLESHOOTING:\")
print(\"   • If upload fails: Check file size (<25MB for Strava)\")
print(\"   • If missing data: Verify FIT file has the expected data types\")
print(\"   • For other FIT files: Update 'fit_filename' variable and re-run\\n\")

print(\"💡 WHY THIS WORKS:\")
print(\"   • Original FIT file has proprietary Wahoo message types\")
print(\"   • Strava rejects files with unknown message types\")
print(\"   • GPX with standard extensions avoids this issue\")
print(\"   • All essential training data is preserved\\n\")

print(\"✅ Ready for Strava upload!\")"

# FIT GPS Recovery Tool

This notebook extracts GPS data from FIT files that Strava rejects due to proprietary message types.

## Problem Solved
- **Issue**: Wahoo ELEMNT FIT files contain unknown message types that Strava rejects
- **Solution**: Extract GPS + power/HR data into standard GPX format with proper extensions
- **Result**: Strava-compatible file with all your training data intact

## Quick Start
1. Place your FIT file in the `data/` folder
2. Update the `fit_filename` variable below
3. Run all cells
4. Upload the generated `enhanced_ride_with_power.gpx` to Strava

In [None]:
# Configuration - UPDATE THIS with your FIT file name
fit_filename = "2025-09-20-132208-ELEMNT_BOLT_8B4E-170-0[1].fit"
data_folder = "data"

# Output file names
basic_gpx_output = "recovered_ride.gpx"
enhanced_gpx_output = "enhanced_ride_with_power.gpx"