In [None]:
import requests
import pymysql as mdb

# This gives information for each station that remains stable over time
url_stations = 'https://gbfs.citibikenyc.com/gbfs/en/station_information.json'

# This gives the live status of all the stations (e.g., bikes available etc)
url_status = 'https://gbfs.citibikenyc.com/gbfs/en/station_status.json'



In [None]:
# This code creates a connection to the database

con = mdb.connect(host = 'localhost', 
                  user = 'root', 
                  passwd = 'dwdstudent2015', 
                  charset='utf8', use_unicode=True);

In [None]:
# Run a query to create a database that will hold the data
db_name = 'citibike_new'
create_db_query = "CREATE DATABASE IF NOT EXISTS {db} DEFAULT CHARACTER SET 'utf8'".format(db=db_name)

# Create a database
cursor = con.cursor()
cursor.execute(create_db_query)
cursor.close()

In [None]:
# Create the two tables. One for storing the time-invariant station data
# and another table to store the time-varying station status data
cursor = con.cursor()
table_name = 'Stations'
create_table_query = '''CREATE TABLE IF NOT EXISTS {db}.{table} 
                                (station_id int, 
                                name varchar(250), 
                                capacity int,
                                lat float,
                                lon float,
                                region_id int,
                                short_name varchar(250),
                                rental_url varchar(250),
                                eightd_has_key_dispenser bool,
                                PRIMARY KEY(station_id)
                                )'''.format(db=db_name, table=table_name)
cursor.execute(create_table_query)
cursor.close()

In [None]:
# Create the time-varying table
cursor = con.cursor()
table_name = 'Status'
create_table_query = '''CREATE TABLE IF NOT EXISTS {db}.{table} 
                                (station_id int, 
                                last_reported datetime,
                                num_bikes_available int,
                                num_bikes_disabled int,
                                num_docks_available int,
                                num_docks_disabled int,
                                is_installed bool,
                                is_renting bool,
                                is_returning bool,
                                eightd_has_available_keys bool,
                                PRIMARY KEY(station_id, last_reported)
                                )'''.format(db=db_name, table=table_name)
cursor.execute(create_table_query)
cursor.close()

In [None]:
# We fetch for now just the time-invariant data
# Notice that we have the INSERT IGNORE so that even when we add the same entry
# again, we do not get an error that the line exists. We do get warnings
# but this is expected
results = requests.get(url_stations).json() 
data = results["data"]["stations"]

table_name = 'Stations'
query_template = '''INSERT IGNORE INTO {db}.{table}(station_id, name,
                                        capacity, lat, lon,
                                        region_id,
                                        short_name,
                                        rental_url,
                                        eightd_has_key_dispenser) 
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)'''.format(db=db_name, table=table_name)
cursor = con.cursor()

for entry in data:
    station_id = int(entry['station_id'])
    name = entry['name']
    capacity = entry['capacity']
    lat = entry['lat']
    lon = entry['lon']
    region_id = entry.get('region_id')
    short_name = entry['short_name']
    rental_url = entry['rental_url']
    eightd_has_key_dispenser = entry['eightd_has_key_dispenser']
                       
    print("Inserting station", station_id, "at", name)
    query_parameters = (station_id, name, capacity, lat, lon,
                        region_id, short_name, rental_url, eightd_has_key_dispenser)
    cursor.execute(query_template, query_parameters)

con.commit()
cursor.close()

In [None]:
# Now we fetch the data about the time varying elements of the citibike stations
from datetime import datetime
results = requests.get(url_status).json() 
data = results["data"]["stations"]

table_name = 'Status'
query_template = '''INSERT IGNORE INTO {db}.{table}(station_id, 
                                            num_bikes_available,
                                            num_bikes_disabled,
                                            num_docks_available,
                                            num_docks_disabled,
                                            is_installed,
                                            is_renting,
                                            is_returning,
                                            last_reported,
                                            eightd_has_available_keys) 
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''.format(db=db_name, table=table_name)
cursor = con.cursor()

for entry in data:
    station_id = int(entry['station_id'])
    num_bikes_available = entry['num_bikes_available']
    num_bikes_disabled = entry['num_bikes_disabled']
    num_docks_available = entry['num_docks_available']
    num_docks_disabled = entry['num_docks_disabled']
    is_installed = entry['is_installed']
    is_renting = entry['is_renting']
    is_returning = entry['is_returning']
    last_reported = datetime.fromtimestamp(entry['last_reported']) 
    eightd_has_available_keys = entry['eightd_has_available_keys']
                       
    print("Inserting station", station_id)
    query_parameters = (station_id, num_bikes_available, num_bikes_disabled,
                        num_docks_available, num_docks_disabled, is_installed, is_renting,
                        is_returning, last_reported, eightd_has_available_keys)
    cursor.execute(query_template, query_parameters)

con.commit()
cursor.close()

In [None]:
con.close()
