In [9]:
!sudo pip3 install -U -q PyMySQL sqlalchemy sql_magic

## Inserting data in MySQL using Python

First let's start with a basic piece of code that fetches the data that we want to insert in the database. For our example, we will get the data about the Citibike stations, using the correspoding API call provided by the Citibike website:

In [2]:
import requests
import uuid
from datetime import date, datetime, timedelta

In [3]:
# Let's get the data from the Citibike API
url = "https://gbfs.citibikenyc.com/gbfs/en/station_information.json"
results = requests.get(url).json()

In [4]:
# We only need a subset of the data in the JSON returned by the Citibike API, so we keep only we need
data = results["data"]["stations"]

In [5]:
len(data)

1393

In [6]:
from sqlalchemy import create_engine

conn_string = "mysql+pymysql://{user}:{password}@{host}/".format(
    host="db.ipeirotis.org", user="student", password="dwdstudent2015"
)

engine = create_engine(conn_string)

ModuleNotFoundError: No module named 'pymysql'

Once we have connected successfully, we need to create our database:

In [None]:
# Query to create a database
# In this example, we will try to create the (existing) database "public"
# But in general, we can give any name to the database
db_name = "public"
create_db_query = (
    f"CREATE DATABASE IF NOT EXISTS {db_name} DEFAULT CHARACTER SET 'utf8'"
)

# Create a database
engine.execute(create_db_query)

Then we create the table where we will store our data. For our example, we will just import three fields in the database: station_id, station_name, and number_of_docks

In [None]:
# To avoid conflicts between people writing in the same database, we add a random suffix in the tables
# We only create the variable once while running the notebook
if "suffix" not in globals():
    suffix = str(uuid.uuid4())[:8]
print(suffix)

In [None]:
data[1]

In [None]:
table_name = f"Docks_{suffix}"
# Create a table
create_table_query = f"""CREATE TABLE IF NOT EXISTS {db_name}.{table_name} 
                                (station_id int, 
                                station_name varchar(250), 
                                capacity int,
                                PRIMARY KEY(station_id)
                                )"""
engine.execute(create_table_query)

Finally, we import the data into our table, using the INSERT command. 

In [None]:
query_template = f"""
                    INSERT INTO 
                    {db_name}.{table_name}(station_id,  station_name,  capacity) 
                    VALUES (%s, %s, %s)"""

# THIS IS PROHIBITED
# query = "INSERT INTO citibike.Docks(station_id, station_name, number_of_docks) " + \
#         "VALUES ("+entry["id"]+", "+entry["stationName"]+", "+entry["totalDocks"]+")"


for entry in data:
    dockid = entry["station_id"]
    addr = entry["name"]
    docks = entry["capacity"]
    # available = entry["availableDocks"]
    # date =  datetime.now()
    # lastcommunicationtime is a string of
    # the form "2016-02-09 10:16:49 AM"
    # See https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
    # to see the documentation on how to parse
    # date = datetime.strptime(entry["lastCommunicationTime"], '%Y-%m-%d %I:%M:%S %p')

    # print("Inserting station", dockid, "at", addr)
    query_parameters = (dockid, addr, docks)
    engine.execute(query_template, query_parameters)

Now let's see how to query the database

In [None]:
results = engine.execute(f"SELECT * FROM {db_name}.{table_name}")
rows = results.fetchall()
results.close()

In [None]:
for row in rows:
    print("Station ID:", row["station_id"])
    print("Station Name:", row["station_name"])
    print("Number of Docks:", row["capacity"])
    print("=============================================")

Finally, let's clean up and close our database connection.

In [None]:
drop_table_query = f"DROP TABLE IF EXISTS {db_name}.{table_name}"
engine.execute(drop_table_query)

## Exercise

At `https://gbfs.citibikenyc.com/gbfs/en/station_status.json` we can access the live status of all the stations (e.g., bikes available etc). Using the approach outlined above, create a table in the database (using the same table suffix that we created above) and store the data in the database.