# CONNECT GOOGLE DRIVE AND SAVE DATAFRAME & RESPONSES

In [1]:
import pandas as pd
import time

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
# Mount Google Drive. Once mounted will work as mounted Data Lake / local dir
# can also run drive.flush_and_unmount()
# if it fails just restart the runtime
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

#%ls
#%mkdir strava_outputs

Mounted at /content/drive


In [None]:
# go to strava_outputs directory where we'll be saving all results in raw .json format
%cd /content/drive/My\ Drive/python_workbooks/strava_outputs


# KEYS READ

In [39]:
# move this to secrets store
STRAVA_CLIENT_ID = 123456
STRAVA_CLIENT_SECRET = 'secret_here'
STRAVA_ACCESS_TOKEN = 'access_token_here'
STRAVA_REFRESH_TOKEN = 'refresh_token_here'


# STRAVA ACTIVITY READ classic manual method

In [None]:
import gspread
from oauth2client.service_account import ServiceAccountCredentials

%cd /content/drive/My\ Drive/python_workbooks

# use creds to create a client to interact with the Google Drive API
#scope = ['https://spreadsheets.google.com/feeds']
scope = ['https://spreadsheets.google.com/feeds' + ' ' +'https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name('client_secret.json', scope)
gc = gspread.authorize(credentials)



#Strava Authentication. Two steps:
> * Use Strava oauth where you obtain authentication code
> * Run requests.post using obtained code to get access token & refresh token


What exactly is happening using the code below can be also done manually by running launching oauth to obtain code, then exchange that code for an access token in the links below:

1. first get a code by pasting below address to your browser & authorise. Replace your client id here
https://www.strava.com/oauth/authorize?client_id=[your_client_id]&redirect_uri=http://localhost&response_type=code&scope=activity:read_all

2. Above should give you code

3. Use the code above in running a POST request to give you access token (token that expires).
Update client_id, client_secret(from your strava api settings page) and paste code above
https://www.strava.com/oauth/token?client_id=[your_client_id]&client_secret=[your_client_secret]&code=[your_code_here]&grant_type=authorization_code

4. Above returns access token
access_token = [access_token]

5. Using the access token now you can call different strava endpoints:
https://www.strava.com/api/v3/athlete/activities?access_token=[access_token]


In [None]:
import requests
import json
import urllib
from requests_oauthlib import OAuth2Session
base_url = "https://www.strava.com/oauth/authorize?"
known_response = requests.get(base_url)
redirect_uri = "http://localhost/exchange_token"



strava_scope = 'read,read_all,activity:read,activity:read_all,profile:read_all'
strava_scope = 'read,read_all,activity:read'
oauth2_session = OAuth2Session(STRAVA_CLIENT_ID,  redirect_uri=redirect_uri, scope=strava_scope)
authorization_request, state = oauth2_session.authorization_url(base_url)#,STATE)
print("Click on the following link to present the user with sign in form where they authenticate and approve access to your application.")
print(authorization_request) 

In [7]:
code = 'paste_here_received_code'

In [None]:
import requests
from datetime import datetime

import urllib3
urllib3.disable_warnings()


auth_url ="https://www.strava.com/oauth/token"
payload = {
    'client_id' : STRAVA_CLIENT_ID,
    'client_secret' : STRAVA_CLIENT_SECRET,
    #'refresh_token' : STRAVA_REFRESH_TOKEN,
    'code': code,
    #'grant_type' : "refresh_token",
    'grant_type': 'authorization_code'
    #'scope': 'read,read_all,activity:read,activity:read_all,profile:read_all',
    #'f':'json'
}
print("Requesting the token...\n")
res = requests.post(auth_url,data=payload,verify=False)
#print(res.json())

access_token = res.json()['access_token']
expiry_ts = res.json()['expires_at']
print("New token will expire at: ",end='\t')
print(datetime.utcfromtimestamp(expiry_ts).strftime('%Y-%m-%d %H:%M:%S'))
print(access_token)


############
activites_url = "https://www.strava.com/api/v3/athlete/activities"
activites_url = "https://www.strava.com/api/v3/athlete"

header = {'Authorization': 'Bearer ' + access_token}
param = {'per_page': 200, 'page': 1}

my_dataset = requests.get(activites_url, headers=header, params=param).json()
print("Authenticated Athleete: ",my_dataset['username'])

In [None]:
import requests
import json

url = 'https://www.strava.com/api/v3/athlete/activities'

#with 100 activities per page you need to calculate how many requests to make. Divide activities you've made by 100
starting_page = 1
limit = 35

output_list = []
params = {'access_token': access_token, 'per_page': 100, 'page': 1}
for i in range(starting_page, limit):
  params['page'] = i

  r = requests.get(url, params = params)
  print("Reading page ",params['page']," out of ",limit,"finished with ",str(r)," code")
  data = r.json()

  # save outputs to drive as json file
  filename = "activities_"+str(params['page'])+"_25"+".json"
  with open(filename, 'w') as f:
    f.write(json.dumps(data))

  output_list.append(data)

In [None]:
actlist = []
for page in output_list:
  for item in page:
    actdict = {}
    actdict['type'] = item['type']
    actdict['moving_time'] = item['moving_time']
    actdict['TIME'] = time.strftime('%H:%M:%S', time.gmtime(item['moving_time']))
    actdict['start_date_local'] = item['start_date_local']
    actdict['name'] = item['name']
    actdict['distance'] = item['distance']
    actdict['gear_id'] = item['gear_id']
    actdict['total_elevation_gain'] = item['total_elevation_gain']
    actdict['kudos_count'] = item['kudos_count']
    actdict['suffer_score'] = item['suffer_score']
    actdict['pr_count'] = item['pr_count']
    actdict['start_latlng'] = item['start_latlng']
    actdict['end_latlng'] = item['end_latlng']
    actdict['start_latlng'] = item['start_latlng']
    #if 'summary_polyline' in item:
    #  actdict['summary_polyline'] = item['summary_polyline']
    #else:
    #  actdict['summary_polyline'] = None
    actlist.append(actdict)

actDF = pd.DataFrame(actlist)
print(actDF.shape)
actDF.head(10)

# Visualisation

In [None]:
actDF['date_day'] = pd.to_datetime(actDF['start_date_local'],utc=True)
actDF['hour'] = actDF['date_day'].dt.hour
actDF.groupby(['type']).count()



In [36]:
num_activities = 1399
actRUN = actDF[actDF['type']=='Run']
run_hours = actRUN.groupby(['hour']).count().reset_index()
run_hours['count%'] = run_hours['type']/num_activities

In [None]:
run_hours.head(5)

In [25]:
run_hours.head(5)

In [31]:
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt

In [None]:
run_hours.plot.bar(x='hour', y='count%', rot=0, title="running activity start hour")

# Load data into googlesheet
https://console.developers.google.com/?pli=1

In [None]:
#%pip install df2gspread

import gspread
from oauth2client.service_account import ServiceAccountCredentials

%cd /content/drive/My\ Drive/python_workbooks

# use creds to create a client to interact with the Google Drive API
#scope = ['https://spreadsheets.google.com/feeds']
scope = ['https://spreadsheets.google.com/feeds' + ' ' +'https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name('client_secret.json', scope)
gc = gspread.authorize(credentials)


# Find a workbook by name and open the first sheet
# Make sure you use the right name here.
#sheet = gc.open("daily run results").sheet


In [42]:
spreadsheet = gc.open("daily run results")
#this assumes that first sheet is the STRAVA_TEST sheet, if it doesn't exist it will create it
try:
  worksheet = spreadsheet.add_worksheet(title="STRAVA_TEST", rows="10000", cols="20")
except:
  worksheet = spreadsheet.get_worksheet(0)

In [43]:
sheet_meta = spreadsheet.fetch_sheet_metadata()

In [44]:
#some useful stuff on going through spreadsheets (gspread library)
"""
sheet_meta.keys()
list_of_sheets = []
for k in sheet_meta['sheets']:
  #print(k)
  sheetdict = {}
  for k,v in k['properties'].items():
    #print(sheet)
    sheetdict[k] = v
  list_of_sheets.append(sheetdict)
    #print(k)
"""

dict_keys(['spreadsheetId', 'properties', 'sheets', 'spreadsheetUrl'])

In [45]:

import gspread
from gspread_dataframe import set_with_dataframe

# APPEND DATA TO SHEET
#sheet = select_worksheet('worksheet key here',1)
set_with_dataframe(worksheet, actDF) #-> THIS EXPORTS YOUR DATAFRAME TO THE GOOGLE SHEET

In [None]:
"""
# development - reading clubs data
starting_page = 1
limit = 10

output_list = []
params = {'per_page': 100, 'page': 1}
for i in range(starting_page, limit):
  params['page'] = i

  url = "https://www.strava.com/api/v3/clubs/554103/activities?access_token=826f7afa7a0db197aa3a82fb5632392a62dae807"
  r = requests.get(url,headers=header, params = params)
  data = r.json()
  output_list.append(data)

print(len(output_list))
output_list[0]
"""