In [1]:
# import secrets and tokens from config.py
from config import client_id, client_secret, refresh_token

import requests
import urllib3

# import packages for data manipulation
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

auth_url = "https://www.strava.com/oauth/token"
activites_url = "https://www.strava.com/api/v3/athlete/activities"

payload = {
    'client_id': client_id,
    'client_secret': client_secret,
    'refresh_token': refresh_token,
    'grant_type': "refresh_token",
    'f': 'json'
}

In [2]:
print("Requesting Token...\n")
res = requests.post(auth_url, data=payload, verify=False)
access_token = res.json()['access_token']
# print("Access Token = {}\n".format(access_token))
print("Success, token acquired!")

header = {'Authorization': 'Bearer ' + access_token}
param = {'per_page': 200, 'page': 1}
my_dataset = requests.get(activites_url, headers=header, params=param).json()

Requesting Token...

Access Token = d930cc7c18c09e1756bafa9a66eb922dca43ef43



In [3]:
activities = pd.json_normalize(my_dataset)

In [4]:
activities.columns 

Index(['resource_state', 'name', 'distance', 'moving_time', 'elapsed_time',
       'total_elevation_gain', 'type', 'sport_type', 'workout_type', 'id',
       'start_date', 'start_date_local', 'timezone', 'utc_offset',
       'location_city', 'location_state', 'location_country',
       'achievement_count', 'kudos_count', 'comment_count', 'athlete_count',
       'photo_count', 'trainer', 'commute', 'manual', 'private', 'visibility',
       'flagged', 'gear_id', 'start_latlng', 'end_latlng', 'average_speed',
       'max_speed', 'has_heartrate', 'heartrate_opt_out',
       'display_hide_heartrate_option', 'elev_high', 'elev_low', 'upload_id',
       'upload_id_str', 'external_id', 'from_accepted_tag', 'pr_count',
       'total_photo_count', 'has_kudoed', 'athlete.id',
       'athlete.resource_state', 'map.id', 'map.summary_polyline',
       'map.resource_state', 'average_heartrate', 'max_heartrate',
       'suffer_score', 'average_cadence'],
      dtype='object')

In [5]:
#Break date into start time and date
activities['start_date_local'] = pd.to_datetime(activities['start_date_local'])
activities['start_time'] = activities['start_date_local'].dt.time
activities['start_date_local'] = activities['start_date_local'].dt.date
activities.head(5)

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,...,athlete.id,athlete.resource_state,map.id,map.summary_polyline,map.resource_state,average_heartrate,max_heartrate,suffer_score,average_cadence,start_time
0,2,Afternoon ride,9010.8,1797,1901,5.1,Ride,Ride,10.0,8419675852,...,43537797,1,a8419675852,a|ijFntcdVk@nBQt@IRGl@}ArF_@zCCHEDOdAYlA[XSd@K...,2,,,,,13:40:49
1,2,First NFS team ride. Saw 10+ deer!,48331.4,8518,8962,84.6,Ride,Ride,10.0,8417951962,...,43537797,1,a8417951962,gtijFf{qdVCcAqNyGGk@zGmZhI}e@nA{DLmBm@kCxJ_m@K...,2,108.8,152.0,26.0,,05:49:02
2,2,Testing out the old Schwinn Paramount,6512.3,1287,1417,20.1,Ride,Ride,10.0,8414026073,...,43537797,1,a8414026073,ybijFdtadVCi@@OKo@g@SKAkCAqAB_@Cw@DaBBcA@a@CW?...,2,,,,,12:00:23
3,2,FF W3 - Hill Repeats,12103.5,4473,6386,85.7,Run,Run,0.0,8412436546,...,43537797,1,a8412436546,wqijFxdrdVRmC\gBNMLFd@f@|@XnCxAlCjAzAz@`EdBxBp...,2,151.6,178.0,106.0,,05:29:27
4,2,Taking the scenic route back,16468.4,3464,3861,31.1,Ride,Ride,10.0,8409775003,...,43537797,1,a8409775003,_dijFfpadVsOLg@Ca@DOCuCDuAPiAb@_At@qAhB_@b@iAf...,2,,,,,16:30:44


In [6]:
# import modules
import os
import time
import matplotlib.pyplot as plt
import folium
import polyline
import base64
from tqdm import tqdm

In [24]:
# add decoded summary polylines
activities['map.polyline'] = activities['map.summary_polyline'].apply(polyline.decode)
print("turkey")
#####################
# define function to get elevation data using the open-elevation API
def get_elevation(latitude, longitude):
    base_url = 'https://api.open-elevation.com/api/v1/lookup'
    payload = {'locations': f'{latitude},{longitude}'}
    r = requests.get(base_url, params=payload).json()['results'][0]
    return r['elevation']
# get elevation data
elevation_data = list()
for idx in tqdm(activities.index):
    activity = activities.loc[idx, :]
    elevation = [get_elevation(coord[0], coord[1]) for coord in activity['map.polyline']]
    elevation_data.append(elevation)

turkey


  1%|          | 1/94 [04:02<6:16:04, 242.63s/it]


KeyboardInterrupt: 

In [None]:
# add elevation data to dataframe
activities['map.elevation'] = elevation_data

In [7]:
# Make a copy of activities DataFrame for feature engineering
activities_copy = activities.copy()

In [8]:
# filter down to Ride, Run, and Swim activities
activities_copy = activities_copy.query("type == 'Ride' | type == 'Run' | type == 'Swim'")
print(len(activities_copy))

94


In [9]:
# convert data types
activities_copy.loc[:, 'start_date'] = pd.to_datetime(activities_copy['start_date']).dt.tz_localize(None)
activities_copy.loc[:, 'start_date_local'] = pd.to_datetime(activities_copy['start_date_local']).dt.tz_localize(None)
# convert values
activities_copy.loc[:, 'distance'] /= 1609.344 # convert from meters to miles
activities_copy.loc[:, 'average_speed'] *= 2.23693629 # convert from meters/second to miles/hour
activities_copy.loc[:, 'max_speed'] *= 2.23693629 # convert from meters/second to miles/hour
# set index
activities_copy.set_index('start_date_local', inplace=True)

  activities_copy.loc[:, 'start_date'] = pd.to_datetime(activities_copy['start_date']).dt.tz_localize(None)
  activities_copy.loc[:, 'start_date_local'] = pd.to_datetime(activities_copy['start_date_local']).dt.tz_localize(None)


In [14]:
activities_copy.head()

Unnamed: 0_level_0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,...,athlete.id,athlete.resource_state,map.id,map.summary_polyline,map.resource_state,average_heartrate,max_heartrate,suffer_score,average_cadence,start_time
start_date_local,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-19,2,Afternoon ride,5.599052,1797,1901,5.1,Ride,Ride,10.0,8419675852,...,43537797,1,a8419675852,a|ijFntcdVk@nBQt@IRGl@}ArF_@zCCHEDOdAYlA[XSd@K...,2,,,,,13:40:49
2023-01-19,2,First NFS team ride. Saw 10+ deer!,30.03174,8518,8962,84.6,Ride,Ride,10.0,8417951962,...,43537797,1,a8417951962,gtijFf{qdVCcAqNyGGk@zGmZhI}e@nA{DLmBm@kCxJ_m@K...,2,108.8,152.0,26.0,,05:49:02
2023-01-18,2,Testing out the old Schwinn Paramount,4.046556,1287,1417,20.1,Ride,Ride,10.0,8414026073,...,43537797,1,a8414026073,ybijFdtadVCi@@OKo@g@SKAkCAqAB_@Cw@DaBBcA@a@CW?...,2,,,,,12:00:23
2023-01-18,2,FF W3 - Hill Repeats,7.520766,4473,6386,85.7,Run,Run,0.0,8412436546,...,43537797,1,a8412436546,wqijFxdrdVRmC\gBNMLFd@f@|@XnCxAlCjAzAz@`EdBxBp...,2,151.6,178.0,106.0,,05:29:27
2023-01-17,2,Taking the scenic route back,10.232989,3464,3861,31.1,Ride,Ride,10.0,8409775003,...,43537797,1,a8409775003,_dijFfpadVsOLg@Ca@DOCuCDuAPiAb@_At@qAhB_@b@iAf...,2,,,,,16:30:44


In [10]:
miles_per_activity = pd.DataFrame(activities_copy.groupby(['type'])['distance'].sum().reset_index())
miles_per_activity

Unnamed: 0,type,distance
0,Ride,173.949075
1,Run,162.178254
2,Swim,0.994194


In [52]:
type(miles_per_activity)

pandas.core.frame.DataFrame

In [11]:
# Set monthly goals in miles per activity
practice_sessions = 8

# 20 mile ride per session
ride_goal = 20 * practice_sessions

# 4 mile run per session
run_goal = 4 * practice_sessions

# half-mile swim per session
swim_goal = 0.5 * practice_sessions
print(ride_goal, run_goal, swim_goal)

160 32 4.0


In [12]:
dif_ride_miles = ride_goal - miles_per_activity['distance'][0]
dif_ride_miles

-13.94907490256901

In [13]:
print("You need to ride " + str(dif_ride_miles) + " to meet your goal this month!")

You need to ride -13.94907490256901 to meet your goal this month!


In [None]:
# filter activities based on start time per month???

In [None]:
# next step: Refer to Pamela Fox's best practices video.
# not for linting, but for managing git

In [15]:
import plotly.express as px

In [16]:
fig1 = px.box(
    activities_copy, x = "distance", 
    title="1. Boxplot: Distance by Activity Type", 
    color="sport_type", 
    points="all"
)
fig1.show() 