## Primitive Solution for loading REST payloads

Let us see how the solution is typically provided by the learners. There is nothing wrong with it as a learner, but this solution is not following software engineering principles such as modularizing the code or reusability of the code.

In [None]:
%run 00_setup_database_variables.ipynb

In [None]:
%load_ext sql

In [None]:
%env DATABASE_URL=postgresql://{username}_sms_user:{password}@{postgres_host}:{postgres_port}/{username}_sms_db

In [None]:
%%sql
SELECT * FROM information_schema.tables 
WHERE table_catalog = '{username}_sms_db' AND table_schema = 'public'
LIMIT 10

In [None]:
%%sql

DROP TABLE IF EXISTS stations;
CREATE TABLE stations (
    id SERIAL PRIMARY KEY,
    station_id INT UNIQUE ,
    station_type VARCHAR(200),
    name VARCHAR(200),
    short_name VARCHAR(200),
    capacity VARCHAR(100),
    external_id varchar(300),
    has_kiosk varchar(100),
    legacy_id varchar(100),
    region_id varchar(100),
    electric_bike_surcharge_waiver varchar(100),
    eightd_station_services varchar(300)
);

In [None]:
%%sql
SELECT * FROM stations LIMIT 5

In [None]:
import psycopg2
def get_pg_connection(host, port, database, user, password):
    connection = None
    try:
        connection = psycopg2.connect(
            host=host,
            port=port,
            database=database,
            user=user,
            password=password
        )
    except Exception as e:
        raise(e)
    
    return connection

In [None]:
connection = get_pg_connection(
    host=postgres_host,
    port=postgres_port,
    database=f'{username}_sms_db',
    user=f'{username}_sms_user',
    password=password
)

In [None]:
import requests
url = "https://gbfs.citibikenyc.com/gbfs/en/station_information.json"
payload={}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)

In [None]:
import json
data = response.json()
data = data['data']['stations']

In [None]:
import pandas as pd
df = pd.json_normalize(data)

In [None]:
type(df)

In [None]:
stations_df = df[['station_id', 'station_type', 'name', 'short_name',
             'capacity', 'external_id', 'has_kiosk', 'legacy_id',
             'region_id', 'electric_bike_surcharge_waiver', 'eightd_station_services']]

In [None]:
stations = [tuple(station) for station in stations_df.values]

In [None]:
cursor = connection.cursor()
query = ("""
         INSERT INTO stations 
         (station_id, station_type, name, short_name, 
          capacity, external_id, has_kiosk, legacy_id, 
          region_id, electric_bike_surcharge_waiver, eightd_station_services
         )
         VALUES 
         (%s, %s, %s, %s, 
          %s, %s, %s, %s, 
          %s, %s, %s
         )
    """)
cursor.executemany(query, stations)
connection.commit()
cursor.close()

In [None]:
%%sql

SELECT count(1) from stations

In [None]:
%%sql

DROP TABLE IF EXISTS station_rental_types;
CREATE TABLE station_rental_types(
    station_rental_type_id SERIAL PRIMARY KEY,
    station_id INT ,
    rental_type VARCHAR(100),
    UNIQUE(station_id, rental_type)
);

In [None]:
%%sql
SELECT * FROM station_rental_types limit 5

In [None]:
station_rental_types_df = df[['station_id', 'rental_methods']].explode('rental_methods')

In [None]:
station_rental_types = [tuple(station_rental_type) for station_rental_type in station_rental_types_df.values]

In [None]:
cursor = connection.cursor()
query = ("""
         INSERT INTO station_rental_types 
         (station_id,rental_type)
         VALUES 
         (%s, %s)
        """)
cursor.executemany(query, station_rental_types)
connection.commit()
cursor.close()
connection.close()

In [None]:
%%sql

SELECT count(*) FROM station_rental_types