In [503]:
import numpy as np
import pandas as pd
import requests
import time
import os
import shutil
from tqdm import tqdm

from __future__ import print_function
from apiclient.discovery import build
from apiclient.http import MediaFileUpload
from httplib2 import Http
from oauth2client import file as oauth_file, client, tools
# If modifying these scopes, delete the file token.json.
SCOPES = 'https://www.googleapis.com/auth/drive'

In [501]:
remote_folder_id = '1bxUFafpwtUZT2CMOOPjosFhN9FTUBN4l'
tmp_data_dir = 'tmp'
data_dir = 'data'
minutes = 60
seconds = 1

In [509]:
def get_drive_service():
    store = oauth_file.Storage('token.json')
    creds = store.get()
    if not creds or creds.invalid:
        flow = client.flow_from_clientsecrets('credentials.json', SCOPES)
        creds = tools.run_flow(flow, store)
    return build('drive', 'v3', http=creds.authorize(Http()))

def mkdir_GDrive(folder_name):
    drive_service = get_drive_service()
    
    file_metadata = {
        'name': folder_name,
        'mimeType': 'application/vnd.google-apps.folder'
    }
    file = drive_service.files().create(body=file_metadata,
                                        fields='id').execute()
    return file.get('id')

def upload_GDrive(local_filepath, remote_filename, remote_folder_id=None, mimetype='text/plain'):
    drive_service = get_drive_service()
    
    file_metadata = {'name': remote_filepath}
    if not remote_folder_id==None: file_metadata['parents'] = [remote_folder_id]
    media = MediaFileUpload(local_filepath, mimetype=mimetype, resumable=True)
    file = drive_service.files().create(body=file_metadata, media_body=media).execute()
    
def csv_to_gzip():
    # Convert csv files from tmp data into MultiIndex DataFrame, then save to gzip
    df=None
    timestamps = []
    for filename in os.listdir(tmp_data_dir):
        if filename[-4:] == '.csv':
            timestamps.append(filename[:-4])
            tmp_df = pd.read_csv(os.path.join(tmp_data_dir, filename), index_col=0)
            if df is None:
                df=tmp_df
            else:
                df = df.append(tmp_df)

    index = [np.repeat(timestamps,3),
             np.array(df.index)]
    columns = np.array(df.columns)
    df = pd.DataFrame(df.values, index=index, columns=columns)

    filename = '{}.gz'.format(time.strftime("%Y-%m-%d_%H:%M:%S"))
    df.to_csv(os.path.join(data_dir, filename), compression='gzip')
    
    # Empty tmp folder
    #shutil.rmtree(tmp_data_dir)
    
    return filename

In [270]:
# Make new folder in Google Drive
#mkdir_GDrive('data')

'1bxUFafpwtUZT2CMOOPjosFhN9FTUBN4l'

In [250]:
# TfL Unified API credentials
bikepoint_endpoint = 'https://api.tfl.gov.uk/BikePoint'
api_key = '92d100947363f402b8799976aea7fb43'
api_id = 'fffcdfe2'
payload = {'app_id': api_id, 'app_key': api_key}

In [None]:
upload_loop_wait_time = 30*seconds
write_loop_wait_time = 5*seconds

if not os.path.isdir(tmp_data_dir):
    os.mkdir(tmp_data_dir)

### Infinite Loop ###
while True:
    
    ### Upload Loop ###
    # Upload to Google Drive after X minute(s)
    pbar = tqdm(total=(upload_loop_wait_time//write_loop_wait_time))
    upload_loop_start_time = time.time() # in secs
    while (time.time() - upload_loop_start_time) <= upload_loop_wait_time:
        # Start time
        write_loop_start_time = time.time() # in secs

        # Request new bikepoint data
        bikepoints = requests.get(bikepoint_endpoint, params=payload).json()

        # Extract just data relating to bike availability
        bikepoint_dict = {}
        for bikepoint in bikepoints:
            bikepoint_dict[bikepoint['id']] = {item['key']:item['value'] 
                                               for item in bikepoint['additionalProperties'] 
                                               if item['key'][:2]=='Nb'}
        bikepoint_df = pd.DataFrame(bikepoint_dict)

        # Save to file
        bikepoint_df.to_csv(
            os.path.join(tmp_data_dir, 
            '{}.csv'.format(time.strftime("%Y-%m-%d_%H:%M:%S")))
        )

        # Update progress bar
        pbar.update(1)

        ### Write Loop ###
        while (time.time() - write_loop_start_time) <= write_loop_wait_time:
            time.sleep(0.1) #  check every 0.1 secs
            
    # Condense tmp csv files into gzip
    filename = csv_to_gzip()
    # Save to Google Drive
    local_filepath = os.path.join(data_dir, filename)
    remote_filename = filename
    upload_GDrive(local_filepath, remote_filename, remote_folder_id=remote_folder_id, 
                  mimetype='application/zip')

In [438]:
# Reading a MultiIndex DataFrame from a CSV
#tmp = pd.read_csv(os.path.join(data_dir, '2018-08-07_17:35:30.csv.gz'), 
#                  compression='gzip', index_col=[0,1])

# Accessing rows in a MultiIndex DataFrame
#df.loc['2018-08-07_15:31:58', 'NbBikes']
#df.loc['2018-08-07_15:31:58']