In [33]:
# OPTIONAL: Load the "autoreload" extension so that code can change
%load_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2
from src.data import make_dataset
from dotenv import find_dotenv, load_dotenv
import os
from stravalib.client import Client
import time
import csv

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [53]:
"""
To get data we have to deal with OAuth
1. Run this cell
2. Click it's output
3. Hit authorize
4. Copy the "code" parameter in the redirect and paste it in the cell below
5. Run the cell below
6. To refresh, run the cell below that
"""
client = Client()
authorize_url = client.authorization_url(client_id=22120, redirect_uri='http://localhost:8282/authorized')
# Have the user click the authorization URL, a 'code' param will be added to the redirect_uri
# .....
print(authorize_url)


https://www.strava.com/oauth/authorize?client_id=22120&redirect_uri=http%3A%2F%2Flocalhost%3A8282%2Fauthorized&approval_prompt=auto&response_type=code&scope=read%2Cactivity%3Aread


In [56]:
# Extract the code from your webapp response
code = "4f9497b3487b523145a7d9664a7a9e8b50c8c1a0" # or whatever your framework does
token_response = client.exchange_code_for_token(client_id=os.getenv("client_id"), client_secret=os.getenv("client_secret"), code=code)
access_token = token_response['access_token']
refresh_token = token_response['refresh_token']
expires_at = token_response['expires_at']

# Now store that short-lived access token somewhere (a database?)
client.access_token = access_token
# You must also store the refresh token to be used later on to obtain another valid access token 
# in case the current is already expired
client.refresh_token = refresh_token

# An access_token is only valid for 6 hours, store expires_at somewhere and
# check it before making an API call.
client.token_expires_at = expires_at
 
athlete = client.get_athlete()
print("For {id}, I now have an access token {token}".format(id=athlete.id, token=access_token))



For 25983534, I now have an access token 963731c1306d483dd0a7ade62496aeb7b3e3868c


In [None]:
# ... time passes ...
if time.time() > client.token_expires_at:
    refresh_response = client.refresh_access_token(client_id=os.getenv("client_id"), client_secret=os.getenv("client_secret"),
        refresh_token=client.refresh_token)
    access_token = refresh_response['access_token']
    refresh_token = refresh_response['refresh_token']
    expires_at = refresh_response['expires_at']

In [82]:
# Get the data
activities = client.get_activities()
types = ['time', 'latlng', 'altitude', 'heartrate', 'temp']
headers_written = False
#stream_types = ['time', 'latlng', 'altitude', 'heartrate', 'temp']
stream_types = ['heartrate']
w = None
with open(os.path.join("/home/greg/repos/commute_analysis", "data", "raw", 'raw_strava_data.csv'), 'w') as f:
    for activity in activities:
        streams = client.get_activity_streams(activity.id, types=stream_types, resolution='medium')
        temp = activity.to_dict()
        for k in types:
            if streams and k in streams:
                temp[k] = streams[k].data
            elif k not in temp:
                temp[k] = None
        if not headers_written:
            w = csv.DictWriter(f, temp.keys())
            w.writeheader()
            headers_written = True
        w.writerow(temp)


In [89]:
make_dataset.convert_strava_data()

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block4_values] [items->['external_id', 'athlete', 'name', 'type', 'start_date', 'timezone', 'start_latlng', 'end_latlng', 'map', 'device_watts', 'from_accepted_tag', 'heartrate']]

  """Entry point for launching an IPython kernel.


In [92]:
dataset = make_dataset.load_strava_data()

In [96]:
dataset.columns

Index(['guid', 'external_id', 'upload_id', 'athlete', 'name', 'distance',
       'moving_time', 'elapsed_time', 'total_elevation_gain', 'elev_high',
       'elev_low', 'type', 'start_date', 'start_date_local', 'timezone',
       'utc_offset', 'start_latlng', 'end_latlng', 'location_city',
       'location_state', 'location_country', 'start_latitude',
       'start_longitude', 'achievement_count', 'pr_count', 'kudos_count',
       'comment_count', 'athlete_count', 'photo_count', 'total_photo_count',
       'map', 'trainer', 'commute', 'manual', 'private', 'flagged', 'gear_id',
       'gear', 'average_speed', 'max_speed', 'device_watts', 'has_kudoed',
       'best_efforts', 'segment_efforts', 'splits_metric', 'splits_standard',
       'average_watts', 'weighted_average_watts', 'max_watts', 'suffer_score',
       'has_heartrate', 'average_heartrate', 'max_heartrate',
       'average_cadence', 'kilojoules', 'average_temp', 'device_name',
       'embed_token', 'calories', 'description', 'wo

In [97]:
dataset[["start_date_local", "start_longitude", "start_lat", "distance", "elapsed_time", "moving_time"]]

Unnamed: 0,start_date_local,start_longitude,start_lat,distance,elapsed_time,moving_time
0,2020-01-10 17:23:16,-84.39,33.77708,4.643133,00:25:35,00:24:33
1,2020-01-10 09:38:20,-84.35,33.75095,4.544210,00:21:58,00:21:33
2,2020-01-08 19:04:24,-84.35,33.75097,1.740212,00:15:50,00:15:43
3,2020-01-07 17:57:20,-84.39,33.77708,4.671902,00:31:00,00:28:27
4,2020-01-07 09:32:00,-84.35,33.75090,1.208069,00:23:58,00:09:12
...,...,...,...,...,...,...
306,2017-11-06 10:05:58,-84.40,33.77570,1.047072,00:06:03,00:06:03
307,2017-11-06 08:34:13,-84.37,33.77185,3.683052,00:24:26,00:22:55
308,2017-11-03 15:05:53,-84.40,33.77648,3.659440,00:22:04,00:20:47
309,2017-11-03 07:49:58,-84.36,33.77178,3.555112,00:25:00,00:23:03
