In [41]:
def extract_data_from_s3(s3, bucket_name, s3_folder, object_keys):

    data_dict = {}

    for key in object_keys:
        
        file_key = f"{s3_folder}{key}"
        key = key.replace('.json', '')

        print(key)
        print(file_key)
        try:
            response = s3.get_object(Bucket=bucket_name, Key=file_key)
            content = response['Body'].read().decode('utf-8')  # Decode the byte content
            json_data = json.loads(content)
            data_dict[key] = json_data  # Store the content by object key
        except Exception as e:
            print(f"Error fetching {key}: {e}")
            data_dict[key] = None  # Handle the error accordingly

    return data_dict

s3_folder = "raw/"
config = configparser.ConfigParser()
config.read(os.path.expanduser('~/etc/strava/config.conf'))

BUCKET_NAME = config['S3']['BUCKET_NAME']
OBJECT_KEYS = config['S3']['OBJECT_KEYS'].split(', ')

s3 = boto3.client('s3')
OBJECT_KEYS

['athlete.json', 'stats.json', 'activities.json', 'comments.json']

In [42]:
s3_data = extract_data_from_s3(s3, BUCKET_NAME, s3_folder, OBJECT_KEYS)

athlete
raw/athlete.json
stats
raw/stats.json
activities
raw/activities.json
Error fetching activities: An error occurred while reading from response stream: ("Connection broken: ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)", ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))
comments
raw/comments.json


In [43]:
STATS = s3_data['stats']
ACTIVITIES = s3_data['activities']

In [44]:
def convert_stats(stats):

    def seconds_to_hms(seconds):
        return str(datetime.timedelta(seconds=seconds))
    
    for key in stats.keys():

        if "distance" in stats[key]:         
            stats[key]["distance"] = round(stats[key]["distance"] / 1609.34, 2)  # Meters to miles
        if "moving_time" in stats[key]:
            stats[key]["moving_time"] = seconds_to_hms(stats[key]["moving_time"])  # Seconds to hh:mm:ss
        if "elapsed_time" in stats[key]:
            stats[key]["elapsed_time"] = seconds_to_hms(stats[key]["elapsed_time"])  # Seconds to hh:mm:ss
        if "elevation_gain" in stats[key]:
            stats[key]["elevation_gain"] = round(stats[key]["elevation_gain"] * 3.28084, 1)  # Meters to feet

    # stats = json.dumps(stats)

    return stats

stats = convert_stats(STATS)


In [45]:
processed_dict = {} 
processed_dict['stats'] = stats

In [46]:
processed_dict

{'stats': {'recent_ride_totals': {'count': 0,
   'distance': 0.0,
   'moving_time': '0:00:00',
   'elapsed_time': '0:00:00',
   'elevation_gain': 0.0,
   'achievement_count': 0},
  'all_ride_totals': {'count': 0,
   'distance': 0.0,
   'moving_time': '0:00:00',
   'elapsed_time': '0:00:00',
   'elevation_gain': 0.0},
  'recent_run_totals': {'count': 15,
   'distance': 116.12,
   'moving_time': '19:33:44',
   'elapsed_time': '22:08:19',
   'elevation_gain': 15078.7,
   'achievement_count': 30},
  'all_run_totals': {'count': 681,
   'distance': 5334.12,
   'moving_time': '33 days, 23:31:21',
   'elapsed_time': '38 days, 22:33:33',
   'elevation_gain': 532306.4},
  'recent_swim_totals': {'count': 0,
   'distance': 0.0,
   'moving_time': '0:00:00',
   'elapsed_time': '0:00:00',
   'elevation_gain': 0.0,
   'achievement_count': 0},
  'all_swim_totals': {'count': 0,
   'distance': 0.0,
   'moving_time': '0:00:00',
   'elapsed_time': '0:00:00',
   'elevation_gain': 0.0},
  'ytd_ride_totals': 

In [47]:
s3_folder = "processed/"
load_data_to_s3(s3, BUCKET_NAME, s3_folder, processed_dict)

Key:  processed/stats.json
{'recent_ride_totals': {'count': 0, 'distance': 0.0, 'moving_time': '0:00:00', 'elapsed_time': '0:00:00', 'elevation_gain': 0.0, 'achievement_count': 0}, 'all_ride_totals': {'count': 0, 'distance': 0.0, 'moving_time': '0:00:00', 'elapsed_time': '0:00:00', 'elevation_gain': 0.0}, 'recent_run_totals': {'count': 15, 'distance': 116.12, 'moving_time': '19:33:44', 'elapsed_time': '22:08:19', 'elevation_gain': 15078.7, 'achievement_count': 30}, 'all_run_totals': {'count': 681, 'distance': 5334.12, 'moving_time': '33 days, 23:31:21', 'elapsed_time': '38 days, 22:33:33', 'elevation_gain': 532306.4}, 'recent_swim_totals': {'count': 0, 'distance': 0.0, 'moving_time': '0:00:00', 'elapsed_time': '0:00:00', 'elevation_gain': 0.0, 'achievement_count': 0}, 'all_swim_totals': {'count': 0, 'distance': 0.0, 'moving_time': '0:00:00', 'elapsed_time': '0:00:00', 'elevation_gain': 0.0}, 'ytd_ride_totals': {'count': 0, 'distance': 0.0, 'moving_time': '0:00:00', 'elapsed_time': '0:0

In [35]:
def load_data_to_s3(s3, bucket_name, s3_folder, data_dict):
    
    for key, data in data_dict.items():

        key = f"{s3_folder}{key}.json"
        data = data
        print("Key: ",key)
        print(data)

load_data_to_s3(s3,BUCKET_NAME,'processed/',s3_data)

Key:  processed/athlete.json
{'id': 39181721, 'username': 'xc_trl_rnnr', 'resource_state': 3, 'firstname': 'Alan', 'lastname': 'Kim', 'bio': None, 'city': None, 'state': None, 'country': None, 'sex': 'M', 'premium': False, 'summit': False, 'created_at': '2019-02-11T17:33:32Z', 'updated_at': '2024-10-28T23:17:39Z', 'badge_type_id': 0, 'weight': 0.0, 'profile_medium': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/39181721/27331070/15/medium.jpg', 'profile': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/39181721/27331070/15/large.jpg', 'friend': None, 'follower': None, 'blocked': False, 'can_follow': True, 'follower_count': 54, 'friend_count': 47, 'mutual_friend_count': 0, 'athlete_type': 1, 'date_preference': '%m/%d/%Y', 'measurement_preference': 'feet', 'clubs': [{'id': 1107733, 'resource_state': 2, 'name': 'Newport Run Club', 'profile_medium': 'https://dgalywyr863hv.cloudfront.net/pictures/clubs/1107733/26365753/1/medium.jpg', 'profile': 'https://dgalywyr863hv.cloud

In [27]:
import os
import re
import json
import time
import boto3
import datetime
import requests
import webbrowser
import configparser
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

config = configparser.ConfigParser()
config.read(os.path.expanduser('~/etc/strava/config.conf'))

STRAVA_USERNAME = config['CREDENTIALS']['STRAVA_USERNAME']
STRAVA_PASSWORD = config['CREDENTIALS']['STRAVA_PASSWORD']
ATHLETE_ID = int(config['CREDENTIALS']['ATHLETE_ID'])
CLIENT_ID = int(config['CREDENTIALS']['CLIENT_ID'])
CLIENT_SECRET = config['CREDENTIALS']['CLIENT_SECRET']
REDIRECT_URI = config['CREDENTIALS']['REDIRECT_URI']
SCOPE = config['CREDENTIALS']['SCOPE']

In [2]:
ATHLETE_ID

39181721

In [10]:
def request_access_token():

    driver = webdriver.Chrome() 

    auth_url = (
        f"https://www.strava.com/oauth/authorize"
        f"?client_id={CLIENT_ID}"
        f"&response_type=code"
        f"&redirect_uri={REDIRECT_URI}"
        f"&scope={SCOPE}"
        f"&approval_prompt=auto"
    )
    driver.get(auth_url)

    username_input = driver.find_element(By.NAME, 'email')
    password_input = driver.find_element(By.NAME, 'password')

    username_input.send_keys(STRAVA_USERNAME)
    password_input.send_keys(STRAVA_PASSWORD)
    password_input.send_keys(Keys.RETURN)

    time.sleep(3)

    authorize_button = driver.find_element(By.XPATH, '//button[contains(text(), "Authorize")]')
    authorize_button.click()

    url = driver.current_url

    match = re.search(r'code=([^&]+)', url)
    if match:
        CODE = match.group(1)
        print(f"Access Code: {CODE}")

    driver.quit()

    response = requests.post(
        'https://www.strava.com/oauth/token',
        data={
            'client_id': CLIENT_ID,
            'client_secret': CLIENT_SECRET,
            'code': CODE,
            'grant_type': 'authorization_code'
        }
    )

    if response.status_code == 200:
        access_token = response.json()['access_token']
        print(f'Access Token: {access_token}')
    else:
        print(f'Error: {response.json()}')

    return access_token

access_token = request_access_token()

Access Code: 5061ca581a78d16b6a80fbc5f8b573ed064b0a8b
Access Token: 787648871619763ab5a77d16c12f8de58c0dc01c


### WEBBROWSER METHOD ###
In case selenium failure...

In [3]:
auth_url = (
    f"https://www.strava.com/oauth/authorize"
    f"?client_id={CLIENT_ID}"
    f"&response_type=code"
    f"&redirect_uri={REDIRECT_URI}"
    f"&scope={SCOPE}"
    f"&approval_prompt=auto"
)

webbrowser.open(auth_url)

CODE = input("Paste code: ")

response = requests.post(
    'https://www.strava.com/oauth/token',
    data={
        'client_id': CLIENT_ID,
        'client_secret': CLIENT_SECRET,
        'code': CODE,
        'grant_type': 'authorization_code'
    }
)

if response.status_code == 200:
    access_token = response.json()['access_token']
    print(f'Access Token: {access_token}')
else:
    print(f'Error: {response.json()}')

Error: {'message': 'Bad Request', 'errors': [{'resource': 'AuthorizationCode', 'field': 'code', 'code': 'invalid'}]}


In [11]:
activities_url = 'https://www.strava.com/api/v3/athlete/activities'
athlete_url = 'https://www.strava.com/api/v3/athlete'
stats_url = f"https://www.strava.com/api/v3/athletes/{ATHLETE_ID}/stats"

headers = {
    'Authorization': f'Bearer {access_token}'
}

def request_data(url):

    response = requests.get(url, headers=headers)

    try:
        data = response.json()
    except:
        print(f'Error: {response.json()}')

    return data

def request_activities():

    activities = []
    page = 1
    per_page = 30

    while True:

        url = f"https://www.strava.com/api/v3/athlete/activities?page={page}&per_page={per_page}"
        response = requests.get(url, headers=headers)
        
        if response.status_code != 200:
            print(f"Error: {response.status_code} - {response.text}")
            break

        data = response.json()
        if not data:
            # No more activities to fetch
            break

        activities.extend(data)
        page += 1
        time.sleep(3)

    return activities

def request_comments(activities):

    def comments_per_activity(activity_id):

        url = f"https://www.strava.com/api/v3/activities/{activity_id}/comments"

        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            return response.json()
        else:
            response.raise_for_status()
    # --------------------------------------

    all_comments = []
    limit = 0

    for activity in activities:
        if limit == 100:
            break
        else:
            activity_id = activity['id']
            comments = comments_per_activity(activity_id)
            if comments:
                all_comments.extend(comments)
            limit += 1   

    return all_comments

In [55]:
requests = 0

def request_athlete_profile(requests):

    requests += 1

    return requests
requests = request_athlete_profile(requests)

In [12]:
athlete = request_data(athlete_url)
# stats = request_data(stats_url)

In [16]:
activities = request_activities()

In [9]:
with open('data/raw/activities.json','r') as j:
    activities = json.load(j)

In [10]:
comments = request_comments(activities)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100


In [15]:
with open('data/raw/athlete.json','w') as j:
    json.dump(athlete, j, indent=4)

In [18]:
response = requests.post(
    'https://www.strava.com/oauth/token',
    data={
        'client_id': CLIENT_ID,
        'client_secret': CLIENT_SECRET,
        'code': CODE,
        'grant_type': 'authorization_code'
    }
)

if response.status_code == 200:
    access_token = response.json()['access_token']
    print(f'Access Token: {access_token}')
else:
    print(f'Error: {response.json()}')

Access Token: 01ba02fbf6fcfcc36eb88fb51c840adf37e29748


In [49]:
import boto3

config = configparser.ConfigParser()
config.read(os.path.expanduser('~/etc/strava/config.conf'))
STRAVA_BUCKET = config['S3']['BUCKET_NAME']

def load_data_to_s3(data):
    s3 = boto3.client('s3')
    
    # Load the data to S3 as a JSON file
    s3.put_object(
        Bucket='stava',
        Key="raw/stats.json",
        Body=json.dumps(data)
    )

load_data_to_s3 (stats)

In [67]:
with open('data/raw/athlete.json', 'r') as j:
    athlete = json.load(j)
with open('data/raw/comments.json', 'r') as j:
    comments = json.load(j)
with open('data/raw/stats.json', 'r') as j:
    stats = json.load(j)
with open('data/raw/activities.json','r') as j:
    activities = json.load(j)

In [44]:
import datetime

def convert_stats(stats):

    def seconds_to_hms(seconds):
        return str(datetime.timedelta(seconds=seconds))
    
    for key in stats.keys():

        if "distance" in stats[key]:         
            stats[key]["distance"] = round(stats[key]["distance"] / 1609.34, 2)  # Meters to miles
        if "moving_time" in stats[key]:
            stats[key]["moving_time"] = seconds_to_hms(stats[key]["moving_time"])  # Seconds to hh:mm:ss
        if "elapsed_time" in stats[key]:
            stats[key]["elapsed_time"] = seconds_to_hms(stats[key]["elapsed_time"])  # Seconds to hh:mm:ss
        if "elevation_gain" in stats[key]:
            stats[key]["elevation_gain"] = round(stats[key]["elevation_gain"] * 3.28084, 1)  # Meters to feet

    return stats

stats_convert = convert_stats(stats)

In [68]:
def convert_activities(activities):
    
    def seconds_to_minutes(seconds):
        return round(seconds / 60, 2)

    def mps_to_mph(mps):
        return round(mps * 2.23694, 1)

    # Filter out activities that are Weight Training.
    run_activities = [activity for activity in activities if activity.get('type') != 'WeightTraining']

    for activity in run_activities:
        activity['distance'] = round(activity['distance'] / 1609.34, 2)
        activity['total_elevation_gain'] = round(activity['total_elevation_gain'] * 3.28084, 1)
        activity['moving_time'] = seconds_to_minutes(activity['moving_time'])
        activity['elapsed_time'] = seconds_to_minutes(activity['elapsed_time'])
        activity['average_speed'] = mps_to_mph(activity['average_speed'])
        activity['max_speed'] = mps_to_mph(activity['max_speed'])
        # Garmin metrics. Some activities were recorded without it.
        if 'average_cadence' in activity:
            activity['steps_per_minute'] = round(activity['average_cadence'] * 2, 0)
        if 'elev_high' in activity:
            activity['elev_high'] = round(activity['elev_high'] * 3.28084, 1)
        if 'elev_low' in activity:    
            activity['elev_low'] = round(activity['elev_low'] * 3.28084, 1)

    return run_activities

activities_convert = convert_activities(activities)

In [75]:
config = configparser.ConfigParser()
config.read(os.path.expanduser('~/etc/strava/config.conf'))

OBJECT_KEYS = config['S3']['OBJECT_KEYS']
BUCKET_NAME = config['S3']['BUCKET_NAME']



In [81]:
data_dict = {}

for key in OBJECT_KEYS.split(", "):
    try:
        # response = s3.get_object(Bucket=bucket_name, Key=key)
        # content = response['Body'].read().decode('utf-8')  # Decode the byte content
        # json_data = json.loads(content)
        data_dict[key] = key  # Store the content by object key
    except Exception as e:
        print(f"Error fetching {key}: {e}")
        data_dict[key] = None  # Handle the error accordingly

data_dict

{'athlete.json': 'athlete.json',
 'stats.json': 'stats.json',
 'activities.json': 'activities.json',
 'comments.json': 'comments.json'}

In [82]:
data_dict['stats.json']

'stats.json'