In [None]:
import json
import requests
import time
import os
import pandas as pd

### making data a bit easier to see
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

### should be outside the repo
cred_location = r'/Users/jj/code/strava_creds/'
data_location = r'/Users/jj/code/strava_data/'

## Authorisation
This code will make sure you have the necessary tokens to authenticate to the strava API

full documentation for this can be found here https://developers.strava.com/

In [None]:
### You need to have created an app on strava - this is much easier than it sounds.
### Go through the first couple of steps here if unsure: 
###https://medium.com/@annthurium/getting-started-with-the-strava-api-a-tutorial-f3909496cd2d

### These client tokens don't change, so you should only have to enter these once. 
def get_client_tokens():
    print('please go here https://www.strava.com/settings/api, and copy the token information as prompted')
    client_id = input('please enter your client id')
    client_secret = input('please enter your client secret')
    redirect_uri = input('please enter a redirect uri (use http://localhost/ if unsure)')

    client_tokens = {
        'client_id':str(client_id),
        'client_secret':str(client_secret),
        'redirect_uri':str(redirect_uri)
    }

    with open(cred_location+'client_tokens.json', 'w') as outfile:
        json.dump(client_tokens, outfile, indent=4)

    print("client tokens saved")
    return None

### This will just check if the client tokens exist, not if they are valid.
### client token, client secret, and refresh token should all be held in a json file called client_tokens.json
def check_client_tokens():
    try:
        client_tokens = json.load(open(cred_location+'client_tokens.json'))
        if {'client_id', 'client_secret', 'redirect_uri'} == set(client_tokens.keys()):
            print('Client Tokens Found')
            return True
        else:
            return False
    except:
        return False


def prompt_authorization():

      client_tokens = json.load(open(cred_location+'client_tokens.json'))

      ### necessary to get athlete activities
      scopes = ','.join(['profile:read_all', 'activity:read_all'])

      # Authorization URL
      request_url = f'http://www.strava.com/oauth/authorize?client_id={client_tokens["client_id"]}' \
                        f'&response_type=code&redirect_uri={client_tokens["redirect_uri"]}' \
                        f'&approval_prompt=force' \
                        f'&scope={scopes}'

      # This will prompt you and ask for the code in the url
      print('Click here:', request_url)
      print('Please authorize the app and copy&paste below the generated code!')
      print('P.S: you can find the code in the URL between "code=" and the nexrt "&"')
      code = input('Insert the code from the url: ')

      # Get the access token
      token = requests.post(url='https://www.strava.com/api/v3/oauth/token',
                        data={'client_id': client_tokens["client_id"],
                              'client_secret': client_tokens["client_secret"],
                              'code': code,
                              'grant_type': 'authorization_code'})

      access_tokens = token.json()

      # save the token with the applied for scope in the cred_location
      access_tokens.update({'scopes':scopes})

      with open(cred_location+'access_tokens.json', 'w') as outfile:
            json.dump(access_tokens, outfile, indent=4)
      print('Authorisation Complete')



### access tokens expire every 6 hours.
### the access token can be refreshed using this code.
### it will have the same scope as the original access code.
def refresh_authorization():

      # get previously saved tokens
      client_tokens = json.load(open(cred_location+'client_tokens.json'))
      access_tokens = json.load(open(cred_location+'access_tokens.json'))

      refresh_url = "https://www.strava.com/oauth/token"

      payload = {
            'client_secret':client_tokens['client_secret'],
            'client_id':client_tokens['client_id'],
            'refresh_token':access_tokens['refresh_token'],
            'grant_type':'refresh_token',
            'f':'json'
                 }
      
      print("Requesting Token...\n")
      res = requests.post(refresh_url, data=payload, verify=False)
      access_token = res.json()

      # save the new token and copy the scope of the old token
      access_token.update({'scopes':access_tokens['scopes']})

      if res.status_code < 300:
            with open(cred_location+'access_tokens.json', 'w') as outfile:
                  json.dump(access_token, outfile, indent=4)
            print('Authorisation Complete')

      else:
            print('Authorisation Error')
            print(res.json())

      return True





def authorization_flow():

      ### Check if we have client tokens
      if check_client_tokens():
            
            ### check we have an access token
            if os.path.isfile(cred_location+'access_tokens.json'):
                  tokens = json.load(open(cred_location+'access_tokens.json'))
                  
                  ### check if we have the right keys in the access token file
                  if all(x in tokens.keys() for x in ['access_token', 'scopes', 'refresh_token']):

                        ### check if the access token is still valid
                        if time.time() < int(tokens['expires_at']):
                              print((int(tokens['expires_at']) - int(time.time())) / 60, 'minutes until token expires')
                              print('current scopes:', tokens['scopes'])
                        
                        else:
                              print('access_code_expired, requesting refresh')
                              refresh_authorization()
                              authorization_flow() ## restart the flow

                  else:
                        print('data missing from access_tokens.json')
                        prompt_authorization()
                        authorization_flow() ## restart the flow

            else: 
                  print('access_token.json file missing')
                  prompt_authorization()
                  authorization_flow()

      else:
           print('client tokens not found')
           get_client_tokens()
           authorization_flow()

prompt_authorization()                 
authorization_flow()


## Get Data
this code will hit the 'all_activities' endpoint to get all data for the authenticated user

more here https://developers.strava.com/docs/reference/#api-Activities-getLoggedInAthleteActivities

In [None]:
### Makes one request to the API for n number of activities
def get_activities(n, page_no=1):
    activites_url = "https://www.strava.com/api/v3/athlete/activities"

    access_tokens = json.load(open(cred_location+'access_tokens.json'))

    header = {'Authorization': 'Bearer ' + access_tokens['access_token']}
    param = {'per_page': n, 'page': page_no}

    response = requests.get(activites_url, headers=header, params=param)
    return response

### preview of what they look like
two_activities = get_activities(2)
trial_df = pd.DataFrame(two_activities.json())
trial_df


In [None]:
### this iterates through the get_activities() function to get all activities for an athlete
### You should check that the total number of activities matches what's on your strava profile
def get_all_activities():

    output = []
    i = 1
    valid_data = True
    while valid_data == True:
        response = get_activities(200, i)
        if response.status_code < 300 and len(response.json()) > 0: 
            [output.append(x) for x in response.json()]
            print(len(response.json()), 'rows added from page', i)
            i+=1
        else:
            print(response.json())
            valid_data = False

    return output

all_activities = get_all_activities()
df = pd.DataFrame(all_activities)

### save outside the repo
df.to_csv(data_location+'all_activities_raw.csv')
df.head()

## Test Data
A few tests to check the data from the API is as expected

In [None]:
### Test 1: Check that all your activities have been collected

### before doing this, go to https://www.strava.com/dashboard and make a note of your total number of activities

### check that the dataframe is of the same length.
print(len(df))

### test passed if the numbers match

In [None]:
### Test 2: check that the columns are populated
columns_that_must_be_populated = ['id', 'name', 'type', 'start_date', 'private']
possible_empty_values = ['none', 'unknown', 'None', 'Unknown', '', None]

errors = []
for v in possible_empty_values:
    for c in columns_that_must_be_populated:
        if v in df[c]:
            errors.append({c:(df[c] == v).sum()+' instances of '+v})
if errors == []:
    print('TEST PASSED')
else:
    print('TEST FAILED')
    print(errors)

In [None]:
### Test 3: Check that columns which should be unique are unique
columns_that_must_be_unique = ['id', 'start_date']
if all([df[c].is_unique for x in columns_that_must_be_unique]):
    print('TEST PASSED')
else:
    print('TEST_FAILED')
    print([c for c in columns_that_must_be_unique if df[c].is_unique is False])

In [None]:
### Check that data formats are as expected
### Just an eyeball test
df.dtypes

Trying to get other data (not working)

In [None]:
athlete_url = "https://www.strava.com/api/v3/athlete"
access_tokens = json.load(open(cred_location+'access_tokens.json'))

header = {'Authorization': 'Bearer ' + access_tokens['access_token']}

response = requests.get(athlete_url, header)

response.json()

In [None]:
def get_gear_data():

    def lookup_id(id):
        gear_url = "https://www.strava.com/api/v3/gear/{id}" 

        access_tokens = json.load(open(cred_location+'access_tokens.json'))
        header = {'Authorization': 'Bearer ' + access_tokens['access_token']}


        response = requests.get(gear_url, headers=header)
        return response.json()

        

    unique_gear_ids = pd.Series(df['gear_id'].unique())
    return unique_gear_ids.apply(lookup_id)

get_gear_data()[0]
