Credit to [Carlos Torres](https://github.com/carlps/bikeshare) for a useful example of how to access this data. 


**CDC structure** (e.g. station_status)
* Get latest station_status data (latest_data) from data table: {\<station_id\>:\<data\>} where last_updated == max(last_updated)
* Get API data as JSON (new_data)
* Compare latest_data and new_data:
  *  For row in new_data:
     *  propagate "last_updated": row["last_updated"] = new_data["last_updated"] 
     *  if row["station_id"] not in latest_data.keys():
        *  append to output
        *  append to latest_data
        *  log as new row 
     * elif row != latest_data["station_id"] (i.e. row has updates):
       * append to output 
       * update latest_data: latest_data\[row["station_id"\]\] = row["station_id"]
* Load output to data table (if output non-empty)

In [7]:
import requests

In [9]:
import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

handler = logging.FileHandler('./logs/bks_cdc.txt')
handler.setLevel(logging.INFO)

formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)

# Add the handler to the logger
logger.addHandler(handler)

In [13]:
from google.cloud import bigquery
from google.oauth2 import service_account

key_path = "/home/alex/.creds/dc-bike-sa.json"
credentials = service_account.Credentials.from_service_account_file(
   key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"],
)

client = bigquery.Client(credentials=credentials, project=credentials.project_id)

PROJECT_ID = "dc-bike"
DATASET_ID = "bike_data"
TABLE_ID = "bks_station_status"

full_table_id = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

In [14]:
# Create table for streaming station_status data 

schema = [
  bigquery.SchemaField("station_id", "STRING", mode="REQUIRED"),
  bigquery.SchemaField("legacy_id", "STRING", mode="NULLABLE"),
  bigquery.SchemaField("station_status", "STRING", mode="NULLABLE"),  # station_status
  bigquery.SchemaField("num_docks_available", "INTEGER", mode="NULLABLE"), # num_docks_available
  bigquery.SchemaField("num_docks_disabled", "INTEGER", mode="NULLABLE"), # num_docks_disabled
  bigquery.SchemaField("num_bikes_available", "INTEGER", mode="NULLABLE"), # num_bikes_available
  bigquery.SchemaField("num_bikes_disabled", "INTEGER", mode="NULLABLE"), # num_bikes_disabled
  bigquery.SchemaField("num_scooters_available", "INTEGER", mode="NULLABLE"), # num_scooters_available
  bigquery.SchemaField("num_scooters_unavailable", "INTEGER", mode="NULLABLE"), # num_scooters_unavailable
  bigquery.SchemaField("num_ebikes_available", "INTEGER", mode="NULLABLE"), # num_ebikes_available
  bigquery.SchemaField("is_installed", "INTEGER", mode="NULLABLE"), # is_installed
  bigquery.SchemaField("is_renting", "INTEGER", mode="NULLABLE"), # is_renting
  bigquery.SchemaField("is_returning", "INTEGER", mode="NULLABLE"), # is_returning
  bigquery.SchemaField("eightd_has_available_keys", "BOOL", mode="NULLABLE"), # eightd_has_available_keys 
  bigquery.SchemaField("last_reported", "INTEGER", mode="NULLABLE"), # last_reported 
  bigquery.SchemaField("last_updated", "INTEGER", mode="NULLABLE"), # last_updated
]

table = bigquery.Table(full_table_id, schema=schema)
client.create_table(table)

Table(TableReference(DatasetReference('dc-bike', 'bike_data'), 'bks_station_status'))

In [29]:
query = f"""SELECT * FROM {DATASET_ID}.bks_station_status
WHERE last_updated = (
  SELECT MAX(last_updated) FROM {DATASET_ID}.bks_station_status
)"""

query_job = client.query(query)
result = query_job.result() 

In [30]:
result.to_

<google.cloud.bigquery.table.RowIterator at 0x7fe24d354350>

In [None]:
def get_last_db_update():
  """Retrieve latest data from bk_station_status"""

  query = """SELECT * FROM bks_station_status
  WHERE last_updated == MAX(last_updated)"""

  query_job = client.query(query)
  result = query_job.result() 

  
  



In [None]:
def get_api_data(table_name:str):
  """Retrieve latest data from API"""

  url = f'https://gbfs.capitalbikeshare.com/gbfs/en/{table_name}.json'

  response = requests.get(url)
  response.raise_for_status()
  response_json = response.json()

  if len(response_json['data']) != 1:
    logger.debug(f"Unusual response: response['data'] dictionary of length {len(response['data'])}")
    logger.debug(f'Response: {response_json}')
    raise ValueError("API response has unexpected format")
    
  data = list(response_json['data'].values()[0])

  results = {}
  id = 0
  for record in data: 
    results[id] = record

  logger.info(f"Extracted {len(results)} records from API ")

  return results

In [None]:
def compare_data():
    """Compare API data and latest data"""

In [None]:
def update_db(): 
  """Update streaming table with any new or changed rows from compare_data"""

In [1]:
import requests

url = "https://gbfs.capitalbikeshare.com/gbfs/en/station_status.json"

response = requests.get(url)

response.raise_for_status()

response_json = response.json()

In [2]:
response_json

{'data': {'stations': [{'num_scooters_available': 0,
    'station_id': '84a0159b-5f00-417a-8f06-6c7c8437049f',
    'eightd_has_available_keys': False,
    'is_installed': 1,
    'station_status': 'active',
    'num_docks_available': 5,
    'legacy_id': '84a0159b-5f00-417a-8f06-6c7c8437049f',
    'num_bikes_available': 10,
    'num_bikes_disabled': 0,
    'is_returning': 1,
    'is_renting': 1,
    'num_scooters_unavailable': 0,
    'last_reported': 1678805051,
    'num_docks_disabled': 0,
    'num_ebikes_available': 1},
   {'num_scooters_available': 0,
    'station_id': '08254284-1f3f-11e7-bf6b-3863bb334450',
    'eightd_has_available_keys': False,
    'is_installed': 1,
    'station_status': 'active',
    'num_docks_available': 19,
    'legacy_id': '204',
    'num_bikes_available': 5,
    'num_bikes_disabled': 1,
    'is_returning': 1,
    'is_renting': 1,
    'num_scooters_unavailable': 0,
    'last_reported': 1678805049,
    'num_docks_disabled': 0,
    'num_ebikes_available': 0},
 