# Notebook for configuring access to Google Drive, and looking at Polar H10 workout data!


## Set up Google Drive connection

We are using a special "Service Account" to access Google Drive. To access Google Drive, yo
u need to do two things:
1. You will store this service account credential (a json file - ask Aubrey for it) on your computer. Input the path name below as *credentials_path.*
2. Replace the *folder_id* with the end of the web URL for the folder on Google Drive you are interested in exploring.

The example below is for accessing Polar H10 workout data stored in Google Drive. Data was obtained from participant exports after study completion.

In [9]:
import io
import pandas as pd


# Import packages for connecting to Google Drive
from google.oauth2 import service_account
from googleapiclient.discovery import build
# from googleapiclient.errors import HttpError
# from googleapiclient.http import MediaIoBaseDownload

# CHANGE PATH NAME to where the service account credential is on your computer.
credentials_path = '/Users/imogengardiner/Documents/exercise_credential.json'

# REPLACE FOLDER_ID with the folder in Google Drive you are interested in importing data from.
    # Instructions: Search online for the HIIT and Endurance Study shared google drive. Navigate to the folder with data you are interested in. 
    # Look at the web URL. Replace 'folder_id' with the end of the URL.
    # Example for getting Polar Data: HIIT and Endurance Study Drive > Data > data > workout folder. The workout folder URL is https://drive.google.com/drive/folders/1a9Bmg89_9m9BaYLsLw52PS_m07FQ7CtZ.
    # Notice how the end of the web URL is the folder_id.
folder_id = '1a9Bmg89_9m9BaYLsLw52PS_m07FQ7CtZ'

# Authenticate with Google Drive using service account credentials
credentials = service_account.Credentials.from_service_account_file(credentials_path, scopes=['https://www.googleapis.com/auth/drive'])
drive_service = build('drive', 'v3', credentials=credentials)

# Get files
def get_file_ids_from_dir(parent_id):
    # drive_service = setup_drive()
    results = drive_service.files().list(
        corpora='drive',
        driveId='0AB6End4Uf7P-Uk9PVA', # if you search for the HIIT and Endurance Study shared google drive, this ID is taken from the end of the pathname: https://drive.google.com/drive/folders/0AB6End4Uf7P-Uk9PVA
        q=f"'{parent_id}' in parents",
        includeItemsFromAllDrives=True,
        supportsAllDrives=True
    ).execute()
    files = results.get('files', [])
    if not files:
        raise Exception(f"Folder {parent_id} has no files!")
    # id_name = [{x['name'] : x['id']} for x in files]
    id_name = {}
    for x in files:
        id_name[x['name']] = x['id']
    return id_name

def load_data_from_drive(_drive_service, file_id):
    request = _drive_service.files().get_media(fileId=file_id)
    io_buffer = io.BytesIO()
    downloader = MediaIoBaseDownload(io_buffer, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
    io_buffer.seek(0)
    df = pd.read_csv(io_buffer, parse_dates=['_realtime','_time'])
    # df = df[df['value'] >= 0.8 * df['target_hr_45']]
    return df

# Get file names from folder
fileids = get_file_ids_from_dir(folder_id)

# Print the files names from the folder
# print(fileids)

df_sl = load_data_from_drive(drive_service, fileids['018_qtz1b19261322566215748_workout_allevents.csv'])
dfsl_list = []
for fname, fid in fileids.items():
    dfsl = load_data_from_drive(drive_service, fid)
    dfsl_list.append(dfsl)


In [11]:
dfsl_list

[        value               _realtime                    _time  ppt_id  wk_id  \
 0        75.0 2023-05-01 14:07:32.578  2023-05-01 14:07:32.578       6      1   
 1        75.0 2023-05-01 14:07:33.578  2023-05-01 14:07:33.578       6      1   
 2        76.0 2023-05-01 14:07:34.578  2023-05-01 14:07:34.578       6      1   
 3        76.0 2023-05-01 14:07:35.578  2023-05-01 14:07:35.578       6      1   
 4        78.0 2023-05-01 14:07:36.578  2023-05-01 14:07:36.578       6      1   
 ...       ...                     ...                      ...     ...    ...   
 704908  121.0 2023-09-09 21:52:50.246  2023-09-09 21:52:50.246      19     10   
 704909  120.0 2023-09-09 21:52:51.246  2023-09-09 21:52:51.246      19     10   
 704910  120.0 2023-09-09 21:52:52.246  2023-09-09 21:52:52.246      19     10   
 704911  119.0 2023-09-09 21:52:53.246  2023-09-09 21:52:53.246      19     10   
 704912  119.0 2023-09-09 21:52:54.246  2023-09-09 21:52:54.246      19     10   
 
         wo_id