In [1]:
import os
import sys
import json
import time
import datetime
import pandas as pd
import tqdm
import fitparse
import numpy as np
from io import BytesIO
import concurrent.futures


from dotenv import load_dotenv

from intervals import Intervals

load_dotenv()


ATHLETE_ID = ""
API_KEY = ""

try:
    ATHLETE_ID = os.environ['ATHLETE_ID']
    API_KEY = os.environ['API_KEY']
except KeyError:
    print("Please set ATHLETE_ID and API_KEY in .env file")
    sys.exit(1)


print(ATHLETE_ID)
print(API_KEY)

i171109
7hqhcubmp6ikkulwi4zanoj88


In [2]:
icu = Intervals(ATHLETE_ID, API_KEY)

START_DATE = "02/12/2023"
WELLNESS_COLS = [
    "ctl_start",
    "atl_start",
    "atl",
    "ctl",
    "date",
    "weight",
    "watt_kg",
]

start_date = datetime.datetime.strptime(START_DATE, "%d/%m/%Y").date()
today_date = datetime.date.today()

In [3]:
def fetch_wellness(start_date, end_date):
    wellnessData = icu.wellness(start_date, today_date)
    df = pd.DataFrame(wellnessData)
    df["date"] = pd.to_datetime(df["id"])
    df = df.drop(columns=["id"])
    df = df.sort_values(by="date")
    df['eftp'] = df['sportInfo'].apply(lambda x: x[0]['eftp'] if len(x) > 0 else None)
    df = df.drop(columns=["sportInfo"])

    df['weight'] = df['weight'].ffill().bfill()
    df['eftp'] = df['eftp'].ffill().bfill()

    df['ctl_start'] = df['ctl'].shift(1)
    df['atl_start'] = df['atl'].shift(1)

    df['watt_kg'] = df['eftp'] / df['weight']

    return df[WELLNESS_COLS]

tic = time.time()
wellnessData = fetch_wellness(start_date, today_date)
toc = time.time()
print(f"Time taken: {toc-tic:.2f}s")

Time taken: 0.39s


In [4]:
activityList = icu.activities(start_date, today_date)
activityIdList = [activity['id'] for activity in activityList]

In [5]:
def retrieve_activity_data(activity_id):
    try:
        fit_bytes = icu.activity_fit_data(activity_id)
        fitfile = fitparse.FitFile(BytesIO(fit_bytes))

        records = []
        for record in fitfile.get_messages("record"):
            record_dict = {}
            for record_data in record:
                record_dict[record_data.name] = record_data.value
            records.append(record_dict)

        df = pd.DataFrame(records)
        return df
    except Exception as e:
        print(f"Error fetching activity {activity_id}: {e}")
        return pd.DataFrame()

def fetch_and_combine_activity_data(activity_ids, multi_threading=True):
    dfs = []
    if multi_threading:
        with concurrent.futures.ThreadPoolExecutor() as executor:
            results = list(tqdm.tqdm(executor.map(retrieve_activity_data, activity_ids), total=len(activity_ids)))
            for activity_id, result in zip(activity_ids, results):
                if not result.empty:
                    result['activity_id'] = activity_id
                    dfs.append(result)
    else:
        for activity_id in tqdm.tqdm(activity_ids):
            result = retrieve_activity_data(activity_id)
            if not result.empty:
                result['activity_id'] = activity_id
                dfs.append(result)

    return pd.concat(dfs, ignore_index=True)

tic = time.time()
fetched_activity_data = fetch_and_combine_activity_data(activityIdList)
toc = time.time()

print(f"Time taken: {toc-tic:.2f}s")

100%|██████████| 55/55 [00:49<00:00,  1.12it/s]

Time taken: 49.15s





In [6]:
fetched_activity_data

Unnamed: 0,altitude,cadence,distance,enhanced_altitude,enhanced_speed,grade,heart_rate,position_lat,position_long,power,speed,temperature,timestamp,activity_id
0,30.6,72.0,0.0,30.6,0.000,0.0,89.0,438134738.0,-52892950.0,118.0,0.000,20.0,2024-05-08 15:18:23,11362664941
1,30.6,72.0,0.4,30.6,0.380,0.0,90.0,438134784.0,-52892905.0,96.0,0.380,20.0,2024-05-08 15:18:24,11362664941
2,30.6,0.0,1.8,30.6,0.905,0.0,91.0,438134647.0,-52892785.0,0.0,0.905,20.0,2024-05-08 15:18:25,11362664941
3,30.6,0.0,4.3,30.6,1.450,0.0,92.0,438134556.0,-52892484.0,0.0,1.450,20.0,2024-05-08 15:18:26,11362664941
4,30.6,0.0,7.1,30.6,1.783,0.0,92.0,438134419.0,-52892165.0,0.0,1.783,20.0,2024-05-08 15:18:27,11362664941
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
316013,25.8,,20027.1,25.8,4.940,-1.2,118.0,438135284.0,-52894264.0,,4.940,11.0,2023-12-02 10:17:05,10315175524
316014,25.8,,20031.2,25.8,4.780,0.0,117.0,438135011.0,-52893860.0,,4.780,11.0,2023-12-02 10:17:06,10315175524
316015,25.8,,20034.0,25.8,4.400,-2.7,118.0,438134875.0,-52893513.0,,4.400,11.0,2023-12-02 10:17:07,10315175524
316016,25.8,,20034.6,25.8,3.500,-5.9,119.0,438134875.0,-52893440.0,,3.500,11.0,2023-12-02 10:17:08,10315175524


In [12]:


def summarize_activity_data(activity_df, wellness_df):
    activity_df['timestamp'] = pd.to_datetime(activity_df['timestamp'])
    activity_df.sort_values(by=['activity_id', 'timestamp'], inplace=True)

    activity_df['altitude_diff'] = activity_df['altitude'].diff().fillna(0)
    diffs = activity_df['distance'].diff()
    slopes = np.where(diffs != 0, activity_df['altitude_diff'] / diffs * 100, 0)
    activity_df['slope'] = slopes
    activity_df['slope'].fillna(0, inplace=True)

    initial_time = activity_df['timestamp'].iloc[0]
    activity_df['elapsed_time'] = (activity_df['timestamp'] - initial_time).dt.total_seconds()

    activity_df['time_of_day'] = pd.cut(activity_df['timestamp'].dt.hour, bins=TIME_OF_DAY_CUTS, labels=TIME_OF_DAY_LABELS, right=False)
    activity_df['slope_color'] = pd.cut(activity_df['slope'], bins=SLOPE_CUTS, labels=SLOPE_LABELS)

    activity_df['date'] = activity_df['timestamp'].dt.date.astype(str)
    wellness_df['date'] = wellness_df['date'].astype(str)
    agg_df = pd.merge(activity_df, wellness_df, on="date", how="left")

    agg_df = agg_df.groupby(["activity_id", "date"]).agg({
        'distance': 'max',
        'altitude_diff': lambda x: x[x > 0].sum(),
        'elapsed_time': 'max',
        'time_of_day': 'first',
        'atl_start': 'first',
        'atl': 'first',
        'ctl': 'first',
        'ctl_start': 'first',
        'weight': 'first',
        'watt_kg': 'first'
    }).reset_index()

    # Remove rows with any na
    agg_df = agg_df.dropna()

    df_slope_color = activity_df.groupby(
        ["activity_id", "slope_color"], observed=False).agg(
        distance=("distance", "sum")
    ).reset_index()
    df_slope_color = df_slope_color.pivot(index="activity_id", columns="slope_color", values="distance").fillna(0)
    df_slope_color = df_slope_color.div(df_slope_color.sum(axis=1), axis=0)
    df_slope_color.columns = [f"{col}_pct" for col in df_slope_color.columns]
    agg_df = pd.merge(agg_df, df_slope_color, on="activity_id", how="left")

    return agg_df

summarizedData = summarize_activity_data(fetched_activity_data, wellnessData)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  activity_df['slope'].fillna(0, inplace=True)


In [None]:
# Save to csv file

summarizedData.to_csv("summarizedData.csv", index=False)
