# Garmin Activities Data Exploration

This notebook explores the Activities module from Garmin Connect API.

## Available Activities Methods:
- `get_activities(start, limit)` - Fetch activities with pagination
- `get_activities_fordate(date)` - Activities for specific date
- `get_last_activity()` - Most recent activity
- `get_activity(activity_id)` - Activity summary
- `get_activity_details(activity_id)` - Comprehensive activity data including splits
- `get_activity_gear(activity_id)` - Gear used in activity

## Goal:
Extract activities data and create dbt models that can be joined with gear data!

In [None]:
# Cell 1: Imports and Login
from garminconnect import Garmin
from datetime import date, timedelta
import json
import pandas as pd
import os
from getpass import getpass

# Login using stored tokens
tokenstore = os.path.expanduser("~/.garminconnect")

try:
    print(f"Attempting to login using stored tokens from: {tokenstore}")
    api = Garmin()
    api.login(tokenstore)
    print("‚úÖ Successfully logged in using stored tokens!")
except Exception:
    print("No valid tokens found. Requesting fresh login credentials.")
    email = input("Email address: ").strip()
    password = getpass("Password: ")
    
    api = Garmin(email=email, password=password)
    api.login()
    api.garth.dump(tokenstore)
    print(f"‚úÖ Login successful! Tokens saved to: {tokenstore}")

today = date.today()
print(f"Connected! Today's date: {today}")

In [None]:
# Cell 2: Get Recent Activities (Basic List)
print("üèÉ RECENT ACTIVITIES")
print("="*60)

# Get last 10 activities
activities = api.get_activities(0, 10)

print(f"\nFound {len(activities)} activities:\n")

for i, activity in enumerate(activities, 1):
    activity_id = activity.get('activityId')
    name = activity.get('activityName', 'Unnamed')
    activity_type = activity.get('activityType', {}).get('typeKey', 'unknown')
    distance = activity.get('distance', 0) / 1000  # meters to km
    duration = activity.get('duration', 0) / 60  # seconds to minutes
    start_time = activity.get('startTimeLocal', 'N/A')
    
    print(f"{i}. [{activity_id}] {name}")
    print(f"   Type: {activity_type}")
    print(f"   Distance: {distance:.2f} km")
    print(f"   Duration: {duration:.1f} minutes")
    print(f"   Start: {start_time}")
    print()

In [None]:
# Cell 3: View Raw Data Structure
print("üìã RAW ACTIVITY DATA STRUCTURE")
print("="*60)

# Get the first activity for inspection
if activities:
    sample_activity = activities[0]
    print(f"\nActivity ID: {sample_activity.get('activityId')}")
    print(f"Activity Name: {sample_activity.get('activityName')}")
    print(f"\nAvailable fields ({len(sample_activity)} total):\n")
    
    # Show all keys
    for key in sorted(sample_activity.keys()):
        value = sample_activity[key]
        print(f"  {key}: {type(value).__name__}")
    
    print("\n" + "="*60)
    print("Full JSON:")
    print("="*60)
    print(json.dumps(sample_activity, indent=2, default=str))

In [None]:
# Cell 4: Get Detailed Activity Data
print("üîç DETAILED ACTIVITY DATA")
print("="*60)

if activities:
    # Get details for the first activity
    activity_id = activities[0].get('activityId')
    print(f"\nFetching details for activity: {activity_id}\n")
    
    # Get detailed activity data
    details = api.get_activity(activity_id)
    
    print(f"Activity: {details.get('activityName')}")
    print(f"Type: {details.get('activityType', {}).get('typeKey')}")
    print(f"\nKey Metrics:")
    print(f"  Distance: {details.get('distance', 0)/1000:.2f} km")
    print(f"  Duration: {details.get('duration', 0)/60:.1f} minutes")
    print(f"  Avg Speed: {details.get('averageSpeed', 0)*3.6:.2f} km/h")
    print(f"  Avg HR: {details.get('averageHR', 'N/A')} bpm")
    print(f"  Max HR: {details.get('maxHR', 'N/A')} bpm")
    print(f"  Calories: {details.get('calories', 'N/A')}")
    print(f"  Elevation Gain: {details.get('elevationGain', 'N/A')} m")
    
    print(f"\n\nAvailable fields in detailed data: {len(details)}")
    print("\nField names:")
    for key in sorted(details.keys()):
        print(f"  - {key}")

In [None]:
# Cell 5: Check Activity Gear Connection
print("üëü ACTIVITY GEAR CONNECTION")
print("="*60)

if activities:
    activity_id = activities[0].get('activityId')
    
    try:
        # Get gear used in this activity
        gear = api.get_activity_gear(activity_id)
        
        print(f"\nGear used in activity {activity_id}:\n")
        print(json.dumps(gear, indent=2, default=str))
        
    except Exception as e:
        print(f"Could not fetch gear info: {e}")
    
    # Also check if gear info is in the main activity data
    print("\n" + "="*60)
    print("Checking for gear in main activity data:")
    print("="*60)
    
    if 'gearUuid' in activities[0]:
        print(f"\nGear UUID: {activities[0].get('gearUuid')}")
    else:
        print("\nNo direct gear UUID in activity summary")
    
    # Show all gear-related fields
    gear_fields = [k for k in activities[0].keys() if 'gear' in k.lower()]
    if gear_fields:
        print(f"\nGear-related fields found:")
        for field in gear_fields:
            print(f"  {field}: {activities[0].get(field)}")

In [None]:
# Cell 6: Convert to DataFrame for Analysis
print("üìä ACTIVITIES DATAFRAME")
print("="*60)

# Convert activities to DataFrame
df = pd.DataFrame(activities)

print(f"\nShape: {df.shape[0]} activities, {df.shape[1]} columns")
print(f"\nColumn names:")
print(df.columns.tolist())

print(f"\n\nFirst few activities:")
display(df[['activityId', 'activityName', 'startTimeLocal', 'distance', 'duration', 'calories']].head())

print(f"\n\nData types:")
print(df.dtypes)

In [None]:
# Cell 7: Get More Activities (Last 100)
print("üì• FETCHING LARGER DATASET")
print("="*60)

# Get last 100 activities
print("\nFetching last 100 activities...")
all_activities = api.get_activities(0, 100)

print(f"‚úÖ Retrieved {len(all_activities)} activities")

# Convert to DataFrame
df_all = pd.DataFrame(all_activities)

# Show summary statistics
print("\n" + "="*60)
print("DATASET SUMMARY")
print("="*60)

print(f"\nTotal activities: {len(df_all)}")
print(f"Date range: {df_all['startTimeLocal'].min()} to {df_all['startTimeLocal'].max()}")

if 'activityType' in df_all.columns:
    # Extract activity type key
    df_all['activity_type'] = df_all['activityType'].apply(lambda x: x.get('typeKey') if isinstance(x, dict) else 'unknown')
    print(f"\nActivity types:")
    print(df_all['activity_type'].value_counts())

print(f"\nTotal distance: {df_all['distance'].sum()/1000:.2f} km")
print(f"Total duration: {df_all['duration'].sum()/3600:.1f} hours")
print(f"Total calories: {df_all['calories'].sum():,.0f}")

In [None]:
# Cell 8: Save to Database
import sqlite3

print("üíæ SAVING TO DATABASE")
print("="*60)

# Database path (adjust if needed)
db_path = '../garmin.db'

# Connect to database
conn = sqlite3.connect(db_path)

# Save activities to bronze table
df_all.to_sql('bronze_activities', conn, if_exists='replace', index=False)

print(f"‚úÖ Saved {len(df_all)} activities to 'bronze_activities' table")

# Verify
result = pd.read_sql("SELECT COUNT(*) as count FROM bronze_activities", conn)
print(f"\nVerification: {result['count'][0]} records in database")

conn.close()
print("\n‚ú® Done!")