# API Strava

http://www.strava.com/oauth/authorize?client_id=167090&response_type=code&redirect_uri=http://www.strava.com/exchange_token&approval_prompt=force&scope=activity:read_all

In [2]:
# Run this after click the link
import requests

res = requests.post("https://www.strava.com/oauth/token", data={
    'client_id': '167090',
    'client_secret': '6928a9b4c07b9cce9cb00a9fecba57d433ca0f8b',
    'code': 'af9d0b103fa18adccac91defaf37840b13e480da',  # replace with the real code
    'grant_type': 'authorization_code'
})

print(res.json())


{'token_type': 'Bearer', 'expires_at': 1766581699, 'expires_in': 20872, 'refresh_token': 'a47269a067fc2945d964566a5918dde0bbc16f3d', 'access_token': '07ffe2ea8f2eb93c7e82761d79569fa90ce0abeb', 'athlete': {'id': 66163390, 'username': None, 'resource_state': 2, 'firstname': 'Ihsan', 'lastname': 'Ihsan', 'bio': 'Lari', 'city': '', 'state': '', 'country': None, 'sex': 'M', 'premium': False, 'summit': False, 'created_at': '2020-08-14T01:11:43Z', 'updated_at': '2024-09-01T06:55:52Z', 'badge_type_id': 0, 'weight': 68.0, 'profile_medium': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/66163390/23421588/4/medium.jpg', 'profile': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/66163390/23421588/4/large.jpg', 'friend': None, 'follower': None}}


In [3]:
import os
import json
import time
import pandas as pd
import requests
from requests.exceptions import ConnectionError, Timeout

# =================== CONFIG =================== #
CLIENT_ID = "167090"
CLIENT_SECRET = "6928a9b4c07b9cce9cb00a9fecba57d433ca0f8b"
REFRESH_TOKEN = "a47269a067fc2945d964566a5918dde0bbc16f3d"

DATA_FILE = "all_activities_data.csv"
PROGRESS_FILE = "progress.json"
STREAM_KEYS = [
    "latlng", "time", "heartrate", "cadence", "altitude", "distance",
    "grade_smooth", "power", "vertical_ratio", "vertical_oscillation", "speed"  # ✅ MODIFIED
]

# =================== TOKEN =================== #
def get_access_token(retries=3, delay=5):
    for attempt in range(retries):
        try:
            response = requests.post("https://www.strava.com/oauth/token", data={
                'client_id': CLIENT_ID,
                'client_secret': CLIENT_SECRET,
                'refresh_token': REFRESH_TOKEN,
                'grant_type': 'refresh_token'
            }, timeout=10)

            if response.status_code == 200:
                return response.json()['access_token']
            else:
                print(f"⚠️ Error {response.status_code}: {response.text}")
                return None
        except (ConnectionError, Timeout) as e:
            print(f"⏳ Retry {attempt + 1}/{retries} failed. Reason: {e}")
            time.sleep(delay)
    print("❌ Failed to get access token after retries.")
    return None

# =================== GET ACTIVITIES =================== #
def get_all_activities(token):
    activities = []
    page = 1
    while True:
        url = f"https://www.strava.com/api/v3/athlete/activities?page={page}&per_page=200"
        response = requests.get(url, headers={"Authorization": f"Bearer {token}"})
        if response.status_code != 200:
            print("⚠️ Failed to fetch activities:", response.text)
            break
        data = response.json()
        if not data:
            break
        activities.extend(data)
        page += 1
    return activities

# =================== GET STREAMS =================== #
def get_activity_streams(activity_id, token):
    url = f"https://www.strava.com/api/v3/activities/{activity_id}/streams"
    params = {"keys": ",".join(STREAM_KEYS), "key_by_type": True}
    response = requests.get(url, headers={"Authorization": f"Bearer {token}"}, params=params)
    if response.status_code != 200:
        return None
    return response.json()

# =================== MAIN =================== #
def main():
    access_token = get_access_token()
    if not access_token:
        print("❌ Exiting: No access token.")
        return

    processed_ids = set()
    daily_count = 0
    all_data = pd.DataFrame()

    # Load progress if exists
    if os.path.exists(PROGRESS_FILE):
        with open(PROGRESS_FILE, "r") as f:
            progress = json.load(f)
            processed_ids = set(progress.get("processed_ids", []))
            daily_count = progress.get("daily_count", 0)

    try:
        activities = get_all_activities(access_token)
        for i, act in enumerate(activities):
            act_id = act['id']
            name = act.get("name", "")
            type_ = act.get("type", "")

            if act_id in processed_ids:
                continue

            print(f"[{i+1}/{len(activities)}] Fetching: {name} (ID: {act_id})")

            streams = get_activity_streams(act_id, access_token)
            if not streams or 'time' not in streams or 'latlng' not in streams:
                print(f"❌ Skipping activity {act_id}: Missing essential stream data.")
                continue

            num_points = len(streams['time']['data'])
            df = pd.DataFrame({
                "timestamp": streams['time']['data'],
                "lat": [pt[0] for pt in streams['latlng']['data']],
                "lon": [pt[1] for pt in streams['latlng']['data']],
                "heartrate": streams.get("heartrate", {}).get("data", [None]*num_points),
                "cadence": streams.get("cadence", {}).get("data", [None]*num_points),
                "elevation": streams.get("altitude", {}).get("data", [None]*num_points),
                "distance": streams.get("distance", {}).get("data", [None]*num_points),
                "grade_smooth": streams.get("grade_smooth", {}).get("data", [None]*num_points),
                "power": streams.get("power", {}).get("data", [None]*num_points),  # ✅ MODIFIED
                "vertical_ratio": streams.get("vertical_ratio", {}).get("data", [None]*num_points),  # ✅
                "vertical_oscillation": streams.get("vertical_oscillation", {}).get("data", [None]*num_points),  # ✅
                "speed": streams.get("speed", {}).get("data", [None]*num_points),  # ✅
                "activity_id": act_id,
                "activity_name": name,
                "activity_type": type_,
                "activity_start": act.get("start_date"),
            })

            df["utc_time"] = pd.to_datetime(df["activity_start"]) + pd.to_timedelta(df["timestamp"], unit="s")
            all_data = pd.concat([all_data, df], ignore_index=True)

            processed_ids.add(act_id)
            daily_count += 1

            # Save after each activity
            all_data.to_csv(DATA_FILE, index=False)
            with open(PROGRESS_FILE, "w") as f:
                json.dump({
                    "processed_ids": list(processed_ids),
                    "daily_count": daily_count,
                }, f)

    except Exception as e:
        print("❌ Exception occurred:", e)
    finally:
        print("✅ Final save...")
        all_data.to_csv(DATA_FILE, index=False)
        with open(PROGRESS_FILE, "w") as f:
            json.dump({
                "processed_ids": list(processed_ids),
                "daily_count": daily_count,
            }, f)
        print("✅ Done.")

main()


[1/598] Fetching: Morning Run (ID: 16815483431)
[2/598] Fetching: Morning Run (ID: 16780655378)
[3/598] Fetching: Morning Run (ID: 16762803193)
[4/598] Fetching: Morning Run (ID: 16753283869)
[5/598] Fetching: Morning Run (ID: 16735626062)
[6/598] Fetching: Morning Run (ID: 16698786165)
[7/598] Fetching: Morning Run (ID: 16689137389)
[8/598] Fetching: Morning Run (ID: 16633620617)
[9/598] Fetching: Morning Run (ID: 16605916732)
[10/598] Fetching: Morning Run (ID: 16592098487)
[11/598] Fetching: Morning Run (ID: 16567202860)
[12/598] Fetching: Morning Run (ID: 16548259263)
[13/598] Fetching: Morning Run (ID: 16540245102)
[14/598] Fetching: Afternoon Run (ID: 16485806639)
❌ Skipping activity 16485806639: Missing essential stream data.
[15/598] Fetching: Morning Run (ID: 16470109268)
[16/598] Fetching: Morning Run (ID: 16450789639)
[17/598] Fetching: Morning Run (ID: 16440690530)
[18/598] Fetching: Afternoon Run (ID: 16434241299)
[19/598] Fetching: Afternoon Run (ID: 16434241259)
[20/598]

In [4]:
df = pd.read_csv('/content/all_activities_data.csv')

In [5]:
df['activity_id'].nunique()

67