In [None]:
import os
import sys

from dotenv import load_dotenv
import pandas as pd
import requests


PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

from src.ingestion.auth import strava_auth  # noqa: E402
from src.ingestion.extractors.strava_extractor import StravaExtractor  # noqa: E402

In [None]:
load_dotenv()

CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET')
REFRESH_TOKEN = os.getenv('REFRESH_TOKEN')

In [None]:
access_token = strava_auth.get_access_token()

In [None]:
extractor = StravaExtractor(access_token)
all_activities = extractor.fetch_all_activities(days=3)
all_activities

In [None]:
df_all_activities = pd.DataFrame([a.model_dump() for a in all_activities])

In [None]:
with pd.option_context('display.max_columns', None):
    display(df_all_activities.head())

In [None]:
len(all_activities)

In [None]:
gears = []
df_all_activities.gear_id.unique()

In [None]:
df_all_activities['gear_id'].value_counts()

In [None]:
for gear_id in df_all_activities['gear_id'].unique():
    if gear_id and gear_id is not None:
        gear_url = f'https://www.strava.com/api/v3/gear/{gear_id}'
        r = requests.get(
            gear_url, headers={'Authorization': f'Bearer {access_token}'}, timeout=10
        )
        gear = r.json()
        gears.append(gear)

In [None]:
df_gears = pd.DataFrame(gears)
df_gears.dtypes

In [None]:
# gear_url = f'https://www.strava.com/api/v3/gear/{gear_2}'
# r = requests.get(gear_url, headers=headers, timeout=10)
# gear = r.json()
# gear

# Exploring columns

In [None]:
df_all_activities.columns

In [None]:
df_all_activities.dtypes

In [None]:
with pd.option_context('display.max_columns', None):
    display(df_all_activities.head())

In [None]:
df_all_activities.elev_high.unique()

In [None]:
df_all_activities.location_country.unique()

## Filter out unimportant columns

In [None]:
unimportant_columns = [
    'resource_state',
    'location_city',
    'location_state',
    'location_country',
    'commute',
    'flagged',
    'start_latlng',
    'end_latlng',
    'heartrate_opt_out',
    'external_id',
    'upload_id',
    'upload_id_str',
    'from_accepted_tag',
    'has_kudoed',
    'athlete_resource_state',
    'map_resource_state',
    'type',  # 'Deprecated. Prefer to use sport_type'
]

In [None]:
df_trunc = df_all_activities.drop(columns=unimportant_columns, errors='ignore')
df_trunc