<a href="https://colab.research.google.com/github/anw-g01/strava-data-analysis/blob/main/etl_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Get Strava Client

- Manage the setup and [OAuth authentication](https://developers.strava.com/docs/authentication/) workflow for accessing the [Strava API](https://developers.strava.com/).

- Initialise an authenticated client capable of securely fetching athlete data.

In [1]:
!pip install stravalib --quiet
from stravalib import Client

from google.colab import userdata
import sys
import logging
import warnings
from itertools import cycle
import pandas as pd
import numpy as np

logging.getLogger("stravalib.util.limiter").setLevel(logging.ERROR)    # suppress stravalib warnings
warnings.filterwarnings("ignore", category=DeprecationWarning, module="jupyter_client")   # suppress deprecation warnings
pd.set_option("display.max_columns", None)  # view ALL columns
# pd.reset_option("display.max_columns")      # to reset to default (if needed)

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/125.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.4/125.4 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/306.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m306.8/306.8 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
token = userdata.get("GITHUB_STRAVA_REPO_TOKEN")
username = "anw-g01"
repo = "strava-data-analysis"

# clone repo to utilise get_athlete function from oauth.py module (see GitHub):
!git clone https://{token}@github.com/{username}/{repo}

sys.path.append(f"/content/{repo}")    # add directory containing oauth.py
from oauth import get_athlete

# refresh an access token, authenticate the athlete, and return an authorised client:
CLIENT = get_athlete(
    client_id=userdata.get("CLIENT_ID"),
    client_secret=userdata.get("CLIENT_SECRET"),
    refresh_token=userdata.get("REFRESH_TOKEN2"),
    verbose=False
)

Cloning into 'strava-data-analysis'...
remote: Enumerating objects: 27, done.[K
remote: Counting objects: 100% (27/27), done.[K
remote: Compressing objects: 100% (20/20), done.[K
remote: Total 27 (delta 9), reused 16 (delta 4), pack-reused 0 (from 0)[K
Receiving objects: 100% (27/27), 20.20 KiB | 3.37 MiB/s, done.
Resolving deltas: 100% (9/9), done.

Successfully authenticated athlete: ANW G


# Extract

- Retrieve all activity data from the authenticated Strava client.

- Organise raw activity records into a clean, structured dataset suitable for processing.

In [3]:
def extract(detailled: bool = False) -> list[dict]:
    """Uses the globally authenticated CLIENT object to extract athlete data."""

    # extract all activities (public + private):
    activities = CLIENT.get_activities(limit=None)
    print(f"extracting and storing all activites...\n")

    records = []    # store all activities as a list of dicts
    for i, a in enumerate(activities, start=1):

        print(f"\rno. of activities extracted: {i}", end="")

        rec = {
            # general activity metrics:
            "activity_id": a.id,
            "name": a.name,
            "type": a.type,
            "date": a.start_date.date(),                                # UTC date
            "start_time": a.start_date.time(),                          # UTC time only
            "start_date_local": getattr(a, "start_date_local", None),
            "timezone": getattr(a, "timezone", None),
            "distance_km": a.distance / 1000,                           # m
            "moving_time_s": a.moving_time,                             # s
            "elapsed_time_s": a.elapsed_time,                           # s
            "avg_speed_mps": getattr(a, "average_speed", None),         # m/s
            "max_speed_mps": getattr(a, "max_speed", None),             # m/s
            "total_elev_gain": float(a.total_elevation_gain),           # m
            "highest_elev": getattr(a, "elev_high", None),              # m
            "lowest_elev": getattr(a, "elev_low", None),                # m
            "visibility": getattr(a, "visibility", None),
            "num_comments": getattr(a, "comment_count", None),
            "num_achievements": getattr(a, "achievement_count", None),
            "num_kudos": getattr(a, "kudos_count", None),
            "is_manual": getattr(a, "manual", None),    # auto-recorded vs manually entered activities.
            # running specific metrics:
            "avg_hr": getattr(a, "average_heartrate", None),            # bpm
            "max_hr": getattr(a, "max_heartrate", None),                # bpm
            "avg_cadence_spm": getattr(a, "average_cadence", None),     # spm (strides per minute)
            "gear_id": getattr(a, "gear_id", None),     # use shoe mapping from client.get_athlete().shoes for shoe names
            # note: average_pace not exposed directly by API - compute later as moving_time/distance
        }

        if detailled:
            # extra detailled metrics:
            d = CLIENT.get_activity(a.id)
            rec["description"] = getattr(d, "description", None)  # activity description
            rec["calories"] = getattr(d, "calories", None)
            rec["device_name"] = getattr(d, "device_name", None)

        records.append(rec)

    # get statistics on no. of total public activities:
    athlete = CLIENT.get_athlete()
    stats = CLIENT.get_athlete_stats(athlete.id)
    runs, rides, swims = stats.all_run_totals, stats.all_ride_totals, stats.all_swim_totals
    public_activities = runs.count + rides.count + swims.count

    print(f"\n\n{public_activities}/{len(records)} ({public_activities / len(records) * 100:.1f}% are public)")

    return records

In [4]:
records = extract()

extracting and storing all activites...

no. of activities extracted: 411

402/411 (97.8% are public)


View the extracted data as a `DataFrame` object:

In [5]:
df = pd.DataFrame(records)

df.sample(n=5)

Unnamed: 0,activity_id,name,type,date,start_time,start_date_local,timezone,distance_km,moving_time_s,elapsed_time_s,avg_speed_mps,max_speed_mps,total_elev_gain,highest_elev,lowest_elev,visibility,num_comments,num_achievements,num_kudos,is_manual,avg_hr,max_hr,avg_cadence_spm,gear_id
93,14544787510,8K (151 bpm),root='Run',2025-05-20,20:44:41,2025-05-20 21:44:41+00:00,(GMT+00:00) Europe/London,8.0319,3454,3454,2.325,3.277,24.6,22.7,12.7,followers_only,0,0,0,False,151.4,162.0,81.2,g18035503
238,12098687020,8K (163 bpm),root='Run',2024-08-08,21:10:19,2024-08-08 22:10:19+00:00,(GMT+00:00) Europe/London,8.0078,3104,3128,2.58,6.14,22.5,22.7,12.8,followers_only,0,1,1,False,162.8,179.0,,g18035503
217,13007712773,Lunch Run,root='Run',2024-09-08,10:10:36,2024-09-08 11:10:36+00:00,(GMT+00:00) Europe/London,3.2041,1534,1912,2.089,4.38,9.8,108.2,96.4,followers_only,0,2,0,False,162.8,185.0,81.0,g18035503
318,13007716642,Morning Run,root='Run',2024-02-04,05:51:53,2024-02-04 05:51:53+00:00,(GMT+00:00) Europe/London,14.8749,7150,7304,2.08,5.8,30.8,21.8,6.6,followers_only,0,0,0,False,160.9,184.0,78.0,g18035503
174,13130843253,10K (156 bpm),root='Run',2024-12-16,21:14:06,2024-12-16 21:14:06+00:00,(GMT+00:00) Europe/London,10.0186,4600,4608,2.178,3.633,244.0,24.8,-59.0,followers_only,0,0,0,False,155.7,169.0,79.3,g18035503


# Transform

- Perform unit conversions, derive additional metrics, and formatting for personalisation.

In [6]:
def transform(df: pd.DataFrame) -> pd.DataFrame:

    # ------------ UNIT CONVERSIONS ------------ #

    # speed (m/s to km/h and mph)
    df["avg_speed_km_h"] = (df["avg_speed_mps"] * 3.6).round(2)     # m/s -> km/h
    df["max_speed_km_h"] = (df["max_speed_mps"] * 3.6).round(2)
    df["avg_speed_mph"] = (df["avg_speed_mps"] * 2.23694).round(2)  # m/s -> mph
    df["max_speed_mph"] = (df["max_speed_mps"] * 2.23694).round(2)

    # distance (km to miles)
    df["distance_miles"] = (df["distance_km"] * 0.621371).round(2)  # km -> miles

    # time (timedelta objects)
    df["moving_time"] = pd.to_timedelta(df["moving_time_s"], unit="s")      # s -> timedelta
    df["elapsed_time"] = pd.to_timedelta(df["elapsed_time_s"], unit="s")    # s -> timedelta

    # date (datetime objects)
    df["date"] = pd.to_datetime(df["date"])                                 # convert dates to datetime for ordering
    df["start_date_local"] = pd.to_datetime(df["start_date_local"])         # convert to datetime first
    df["end_time_local"] = df["start_date_local"] + df["elapsed_time"]      # compute end datetime

    # optional: extract just the time components (LOCAL):
    df["start_time"] = df["start_date_local"].dt.time
    df["end_time"] = df["end_time_local"].dt.time

    # ------------ FORMATTING + CLEANING ------------ #

    df["visibility"] = df["visibility"].map({
        "everyone": "Everyone",
        "followers_only": "Followers Only",
        "only_me": "Only Me",
    })

    # map the gear IDs to the shoe name:
    athlete = CLIENT.get_athlete()
    shoe_mapping = {}
    for gear in athlete.shoes:
        shoe_mapping[gear.id] = gear.name
    df["shoe_used"] = df["gear_id"].map(shoe_mapping)

    # clean the activity "type" column, RelaxedActivityType:
    df["type"] = df["type"].astype(str).str.extract(r"root='([^']+)'")  # any character except ', match 1+

    # average running cadence (only runs are doubled as it's per foot initially):
    df.loc[df["type"] == "Run", "avg_cadence_spm"] *= 2

    # ------ DERIVED METRICS ------ #

    # pace (as time deltas):
    for speed_col, pace_col in zip(
        ["avg_speed_km_h", "max_speed_km_h", "avg_speed_mph", "max_speed_mph"],
        ["avg_pace_km", "max_pace_km", "avg_pace_mile", "max_pace_mile"]
    ):
        # mask zeros to avoid zero-division errors:
        df.loc[df[speed_col] <= 0, speed_col] = np.nan

        # create new pace column (converts to min/km and min/mile):
        df[pace_col] = pd.to_timedelta(1 / df[speed_col] * 60, unit="min", errors="coerce")

    return df.round(2)  # all numerics to 2 d.p.

In [7]:
df = transform(df)    # overwrite existing DataFrame

df.sample(n=5)

Unnamed: 0,activity_id,name,type,date,start_time,start_date_local,timezone,distance_km,moving_time_s,elapsed_time_s,avg_speed_mps,max_speed_mps,total_elev_gain,highest_elev,lowest_elev,visibility,num_comments,num_achievements,num_kudos,is_manual,avg_hr,max_hr,avg_cadence_spm,gear_id,avg_speed_km_h,max_speed_km_h,avg_speed_mph,max_speed_mph,distance_miles,moving_time,elapsed_time,end_time_local,end_time,shoe_used,avg_pace_km,max_pace_km,avg_pace_mile,max_pace_mile
335,10407132176,60 mins (161 bpm),Run,2023-12-19,21:48:40,2023-12-19 21:48:40+00:00,(GMT+00:00) Europe/London,8.15,3559,3630,2.29,4.84,24.0,22.9,12.8,Followers Only,0,6,1,False,161.2,182.0,,g13493726,8.25,17.41,5.12,10.82,5.07,0 days 00:59:19,0 days 01:00:30,2023-12-19 22:49:10+00:00,22:49:10,Nike Air Zoom Pegasus 29,0 days 00:07:16.363636362,0 days 00:03:26.777713956,0 days 00:11:43.125000,0 days 00:05:32.717190390
218,12356895820,5 x 200m,Run,2024-09-08,11:10:12,2024-09-08 11:10:12+00:00,(GMT+00:00) Europe/London,5.24,1848,2086,2.84,6.52,13.2,109.5,96.4,Followers Only,0,0,0,False,162.3,188.0,,g13493726,10.21,23.46,6.34,14.58,3.26,0 days 00:30:48,0 days 00:34:46,2024-09-08 11:44:58+00:00,11:44:58,Nike Air Zoom Pegasus 29,0 days 00:05:52.595494614,0 days 00:02:33.452685419,0 days 00:09:27.823343850,0 days 00:04:06.913580244
118,13997509930,6 x 1K,Run,2025-03-26,21:10:24,2025-03-26 21:10:24+00:00,(GMT+00:00) Europe/London,17.01,6253,6426,2.72,3.8,44.9,39.0,12.0,Followers Only,0,5,0,False,160.5,185.0,166.8,g18035503,9.8,13.68,6.09,8.5,10.57,0 days 01:44:13,0 days 01:47:06,2025-03-26 22:57:30+00:00,22:57:30,Nike Invincible Run 3,0 days 00:06:07.346938776,0 days 00:04:23.157894738,0 days 00:09:51.133004928,0 days 00:07:03.529411764
270,11559634594,11K (173 bpm),Run,2024-06-02,22:11:44,2024-06-02 22:11:44+00:00,(GMT+00:00) Europe/London,11.61,4708,4869,2.46,6.38,11.3,14.3,5.9,Followers Only,0,3,0,False,172.9,189.0,,g18035503,8.87,22.97,5.51,14.27,7.21,0 days 01:18:28,0 days 01:21:09,2024-06-02 23:32:53+00:00,23:32:53,Nike Invincible Run 3,0 days 00:06:45.862457724,0 days 00:02:36.726164562,0 days 00:10:53.357531760,0 days 00:04:12.277505256
329,10554619513,2 hr (154 bpm),Run,2024-01-14,06:03:49,2024-01-14 06:03:49+00:00,(GMT+00:00) Europe/London,13.52,6872,7107,1.97,5.49,22.2,18.9,7.3,Followers Only,0,2,1,False,153.7,185.0,,,7.08,19.76,4.4,12.28,8.4,0 days 01:54:32,0 days 01:58:27,2024-01-14 08:02:16+00:00,08:02:16,,0 days 00:08:28.474576272,0 days 00:03:02.186234820,0 days 00:13:38.181818184,0 days 00:04:53.159609118


# Load

Export the processed DataFrame to a suitable file format such as CSV, pickle, or Parquet.

- CSV files will not store pandas-specific data types like `datetime64[ns]` or `timedelta64[ns]`.

- Parquet is a columnar storage format that is very efficient for large datasets and preserves data types.



In [8]:
# sort by most recent activities first:
df.sort_values(by="date", ascending=True).reset_index().drop(columns="index", axis=1)

# export as a CSV file:
df.to_csv("all_activities.csv", index=False)
print("DataFrame saved to 'all_activities.csv'")

DataFrame saved to 'all_activities.csv'


In [9]:
df.to_parquet('all_activities.parquet', index=False)
print("DataFrame saved to 'all_activities.parquet'")

DataFrame saved to 'all_activities.parquet'


Preview exported dataset:

In [10]:
df = pd.read_parquet('all_activities.parquet')

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 411 entries, 0 to 410
Data columns (total 38 columns):
 #   Column            Non-Null Count  Dtype              
---  ------            --------------  -----              
 0   activity_id       411 non-null    int64              
 1   name              411 non-null    object             
 2   type              411 non-null    object             
 3   date              411 non-null    datetime64[ns]     
 4   start_time        411 non-null    object             
 5   start_date_local  411 non-null    datetime64[ns, UTC]
 6   timezone          411 non-null    object             
 7   distance_km       411 non-null    float64            
 8   moving_time_s     411 non-null    int64              
 9   elapsed_time_s    411 non-null    int64              
 10  avg_speed_mps     411 non-null    float64            
 11  max_speed_mps     411 non-null    float64            
 12  total_elev_gain   411 non-null    float64            
 13  highe

In [11]:
# list columns to include for viewing only:
column_order = [
    "date", "start_time", "end_time",
    "name", "type",
    "distance_km", "distance_miles",
    "moving_time", "elapsed_time",
    "avg_pace_km", "avg_pace_mile", "avg_speed_mph", "avg_cadence_spm",
    "avg_hr", "max_hr",
    "max_pace_km",
    "max_speed_mph",
    "total_elev_gain",
    "lowest_elev",
    "highest_elev",
    "shoe_used",
    "activity_id",
    # newly merged columns (not available from extract() function):
    # "desc",
    # "relative_effort",
    # "wind_speed",
    # "max_grade",
    # "calories",
    # "avg_temp",
    # "humidity",
    ]

num_runs = df[df["type"] == "Run"].shape[0]
pct_runs = num_runs / df.shape[0] * 100

print(f"no. of run activities: {num_runs}/{df.shape[0]} ({pct_runs:.1f}%)\n")

df[column_order].sample(n=5)

no. of run activities: 382/411 (92.9%)



Unnamed: 0,date,start_time,end_time,name,type,distance_km,distance_miles,moving_time,elapsed_time,avg_pace_km,avg_pace_mile,avg_speed_mph,avg_cadence_spm,avg_hr,max_hr,max_pace_km,max_speed_mph,total_elev_gain,lowest_elev,highest_elev,shoe_used,activity_id
173,2024-12-17,21:10:30,22:03:20,7K (155 bpm),Run,7.02,4.36,0 days 00:52:42,0 days 00:52:50,0 days 00:07:30.563204003,0 days 00:12:05.806451610,4.96,159.4,155.4,162.0,0 days 00:05:11.149524630,7.19,212.0,-15.6,28.2,Nike Invincible Run 3,13138115498
240,2024-08-07,22:06:39,23:01:50,7K (161 bpm),Run,7.51,4.67,0 days 00:55:07,0 days 00:55:11,0 days 00:07:20.636474910,0 days 00:11:48.661417320,5.08,,160.5,185.0,0 days 00:02:26.222583264,15.3,22.3,12.8,22.7,Nike Invincible Run 3,12090355708
7,2025-12-01,21:45:38,22:55:36,9K (Gone Wrong),Run,9.68,6.02,0 days 01:07:16,0 days 01:09:58,0 days 00:06:56.666666664,0 days 00:11:10.391061450,5.37,160.4,157.4,179.0,0 days 00:04:05.064669846,9.13,26.6,10.6,22.6,Nike Invincible Run 3,16623217785
75,2025-06-25,21:55:18,22:53:24,9K (160 bpm),Run,9.03,5.61,0 days 00:58:06,0 days 00:58:06,0 days 00:06:26.266094418,0 days 00:10:21.761658030,5.79,165.4,159.7,173.0,0 days 00:04:44.810126580,7.85,26.2,11.1,22.7,Nike Invincible Run 3,14917127810
47,2025-08-23,21:38:32,22:51:51,12K (154 bpm),Run,10.02,6.22,0 days 01:13:19,0 days 01:13:19,0 days 00:07:19.024390242,0 days 00:11:47.269155204,5.09,160.6,153.7,172.0,0 days 00:03:09.075630252,11.83,26.6,11.1,22.6,Nike Invincible Run 3,15562790858


# Optional Processing

## Merge Additional Data

Strava supports manual a data download that include additional data and variables such as activity descriptions, weather conditions, and calorie estimates. Accessing these fields via the API typically requires cumbersome, non-paginated calls.

The downloaded files can be optionally merged with the extracted dataset to enable further analysis and produce a richer dataset.

To manually download your Strava data navigate to: `My Account` > `Download or Delete Your Account` > `Download Request` > `Request Your Archive`.

In [18]:
df = pd.read_parquet("all_activities.parquet")    # read dataset if not loaded

In [21]:
df2 = pd.read_csv("activities_from_strava.csv")    # activities CSV downloaded manually from Strava (add to notebook instance)

# only keep columns that weren't extracted with the API
columns = ["Activity ID", "Activity Description", "Relative Effort", "Max Grade", "Calories", "Average Temperature", "Humidity", "Wind Speed", "Media"]

df2 = df2[columns]

# use Activity ID as the foreign key for merging:
x = pd.merge(
    left=df,
    right=df2,
    left_on="activity_id",
    right_on="Activity ID",
    how="left"  # left join
)

x = x.drop(columns=["Activity ID"], axis=1)

x = x.rename(columns={
    "Activity Description": "desc",
    "Relative Effort": "relative_effort",
    "Max Grade": "max_grade",
    "Calories": "calories",
    "Average Temperature": "avg_temp",
    "Humidity": "humidity",
    "Wind Speed": "wind_speed",
    "Media": "media"   # requires cleaning later for counts
    }
)

x.sample(n=5)

Unnamed: 0,activity_id,name,type,date,start_time,start_date_local,timezone,distance_km,moving_time_s,elapsed_time_s,avg_speed_mps,max_speed_mps,total_elev_gain,highest_elev,lowest_elev,visibility,num_comments,num_achievements,num_kudos,is_manual,avg_hr,max_hr,avg_cadence_spm,gear_id,avg_speed_km_h,max_speed_km_h,avg_speed_mph,max_speed_mph,distance_miles,moving_time,elapsed_time,end_time_local,end_time,shoe_used,avg_pace_km,max_pace_km,avg_pace_mile,max_pace_mile,desc,relative_effort,max_grade,calories,avg_temp,humidity,wind_speed,media
101,14383912955,7K w/ Julia sis (141 bpm),Run,2025-05-05,11:45:18,2025-05-05 11:45:18+00:00,(GMT+00:00) Europe/London,7.05,3467,3504,2.04,3.85,21.4,22.7,12.7,Followers Only,0,0,0,False,141.1,162.0,161.8,g18035503,7.33,13.86,4.55,8.61,4.38,0 days 00:57:47,0 days 00:58:24,2025-05-05 12:43:42+00:00,12:43:42,Nike Invincible Run 3,0 days 00:08:11.132332878,0 days 00:04:19.740259740,0 days 00:13:11.208791208,0 days 00:06:58.118466900,Amazing low effort HR.,18.0,6.0,473.0,,,,media/99D7877E-A36B-420B-99E6-187564C8C0C6.jpg...
323,10644278282,2 hr (155 bpm),Run,2024-01-28,05:41:37,2024-01-28 05:41:37+00:00,(GMT+00:00) Europe/London,14.46,6673,7062,2.17,5.92,24.7,18.9,6.1,Followers Only,0,3,1,False,155.1,185.0,,g13493726,7.8,21.31,4.85,13.24,8.98,0 days 01:51:13,0 days 01:57:42,2024-01-28 07:39:19+00:00,07:39:19,Nike Air Zoom Pegasus 29,0 days 00:07:41.538461538,0 days 00:02:48.934772406,0 days 00:12:22.268041236,0 days 00:04:31.903323264,Remarkable turn of leg strength coming back in...,175.0,16.7,,,,,media/A8509BEC-F37A-4E84-A0C3-04E757241347.jpg
55,15369886229,9K (153 bpm),Run,2025-08-06,21:29:02,2025-08-06 21:29:02+00:00,(GMT+00:00) Europe/London,9.02,3801,3801,2.37,3.53,26.4,22.6,11.3,Followers Only,0,0,0,False,152.9,165.0,162.2,g18035503,8.54,12.72,5.31,7.9,5.6,0 days 01:03:21,0 days 01:03:21,2025-08-06 22:32:23+00:00,22:32:23,Nike Invincible Run 3,0 days 00:07:01.545667446,0 days 00:04:43.018867926,0 days 00:11:17.966101692,0 days 00:07:35.696202534,"Can’t be too mad tbf. Tired legs, shallow brea...",64.0,6.0,623.0,,,,
180,13041536520,7K (157 bpm),Run,2024-12-03,20:56:27,2024-12-03 20:56:27+00:00,(GMT+00:00) Europe/London,7.02,3247,3252,2.16,3.48,24.0,27.6,11.2,Followers Only,0,1,0,False,157.0,175.0,158.4,g18035503,7.78,12.51,4.83,7.77,4.36,0 days 00:54:07,0 days 00:54:12,2024-12-03 21:50:39+00:00,21:50:39,Nike Invincible Run 3,0 days 00:07:42.724935732,0 days 00:04:47.769784170,0 days 00:12:25.341614904,0 days 00:07:43.320463320,"Absolute flow state leading up to mid-way, but...",74.0,11.3,471.0,,,,
82,14791774359,9K (153 bpm),Run,2025-06-13,21:50:13,2025-06-13 21:50:13+00:00,(GMT+00:00) Europe/London,9.01,3950,3950,2.28,3.29,26.2,22.7,11.1,Followers Only,0,0,0,False,153.2,163.0,162.2,g18035503,8.22,11.84,5.1,7.36,5.6,0 days 01:05:50,0 days 01:05:50,2025-06-13 22:56:03+00:00,22:56:03,Nike Invincible Run 3,0 days 00:07:17.956204380,0 days 00:05:04.054054056,0 days 00:11:45.882352944,0 days 00:08:09.130434780,Very slow but better leg sensations. Heaviest ...,69.0,6.1,633.0,,,,


In [20]:
# overwrite if happy with merging:
# x.to_parquet('all_activities.parquet', index=False)
# print("DataFrame saved to 'all_activities.parquet'")