## Citibike Dataset



We have deployed the code below as a "Cloud Function" on Google cloud (under the `nyu-datasets` project) and we store the data as parquet files in a Google Bucket.

In [None]:
import functions_framework

import pandas as pd
from datetime import datetime
import requests
import gcsfs

def store_api_call_to_google_bucket(call_name):
    station_status_url = f'https://gbfs.citibikenyc.com/gbfs/en/{call_name}.json'
    resp = requests.get(station_status_url)
    data = resp.json()

    partition = datetime.fromtimestamp(data['last_updated']).strftime("%Y-%m-%d")

    df = pd.DataFrame(data['data']['stations'])
    df['last_updated_date'] = partition

    df.to_parquet(
        path=f"gs://citibike-archive/{call_name}/",
        storage_options={"token": "nyu-datasets-f47ad7afeb13.json"},
        engine="pyarrow",
        compression="snappy",
        partition_cols=["last_updated_date"],
    )


@functions_framework.http
def citibike(request):
    store_api_call_to_google_bucket('station_information')
    store_api_call_to_google_bucket('station_status')
    return "Success"

