In [1]:
from sqlalchemy import create_engine
import psycopg2
import psycopg2.extras
import json

credentials = "Credentials.json"

def pgconnect(credential_filepath, db_schema="public"):
    with open(credential_filepath) as f:
        db_conn_dict = json.load(f)
        host       = db_conn_dict['host']
        db_user    = db_conn_dict['user']
        db_pw      = db_conn_dict['password']
        default_db = db_conn_dict['user']
        try:
            db = create_engine('postgresql+psycopg2://'+db_user+':'+db_pw+'@'+host+'/'+default_db, echo=False)
            conn = db.connect()
            print('Connected successfully.')
        except Exception as e:
            print("Unable to connect to the database.")
            print(e)
            db, conn = None, None
        return db,conn

def query(conn, sqlcmd, args=None, df=True):
    result = pd.DataFrame() if df else None
    try:
        if df:
            result = pd.read_sql_query(sqlcmd, conn, params=args)
        else:
            result = conn.execute(sqlcmd, args).fetchall()
            result = result[0] if len(result) == 1 else result
    except Exception as e:
        print("Error encountered: ", e, sep='\n')
    return result

Connect SQL

In [2]:
db, conn = pgconnect(credentials)

Connected successfully.


In [13]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, MultiPolygon
from geoalchemy2 import Geometry, WKTElement
import matplotlib.pyplot as plt

Open datasets

In [27]:
#SA2 Regions
businesses = pd.read_csv('Businesses.csv')
#Stops
polls = pd.read_csv('PollingPlaces2019.csv')
#Schools
population = pd.read_csv('Population.csv')
income = pd.read_csv('Income.csv')

Rename columns because some of the column names start with number or is uppercase

In [49]:
businesses = businesses.rename(columns={'0_to_50k_businesses':'b_0_to_50k', '50k_to_200k_businesses':'b_50k_to_200k', '200k_to_2m_businesses':'b_200k_to_2m'})
businesses = businesses.rename(columns={'2m_to_5m_businesses':'b_2m_to_5m', '5m_to_10m_businesses':'b_5m_to_10m', '10m_or_more_businesses':'b_10m_or_more'})

population = population.rename(columns={'0-4_people':'p_0_to_4', '5-9_people':'p_5_to_9', '10-14_people':'p_10_to_14', '15-19_people':'p_15_to_19'})
population = population.rename(columns={'20-24_people':'p_20_to_24', '25-29_people':'p_25_to_29', '30-34_people':'p_30_to_34', '35-39_people':'p_35_to_39'})
population = population.rename(columns={'40-44_people':'p_40_to_44', '45-49_people':'p_45_to_49', '50-54_people':'p_50_to_54', '55-59_people':'p_55_to_59'})
population = population.rename(columns={'60-64_people':'p_60_to_64', '65-69_people':'p_65_to_69', '70-74_people':'p_70_to_74', '75-79_people':'p_75_to_79'})
population = population.rename(columns={'80-84_people':'p_80_to_84', '85-and-over_people':'p_85_and_over'})

polls = polls.rename(columns={'FID':'fid'})

Replace some NA values

In [70]:
income = income.replace('np', None)

Create Schema

In [54]:
conn.execute("""
DROP TABLE IF EXISTS businesses;
CREATE TABLE businesses (
    industry_code CHAR, 
    industry_name VARCHAR(100),
    sa2_code VARCHAR(9),
    sa2_name VARCHAR(100),
    b_0_to_50k INTEGER,
    b_50k_to_200k INTEGER,
    b_200k_to_2m INTEGER,
    b_2m_to_5m INTEGER,
    b_5m_to_10m INTEGER,
    b_10m_or_more INTEGER,
    total_businesses INTEGER
);
""")
#b_0_to_50k is the number of businesses' income < 50k

conn.execute("""
DROP TABLE IF EXISTS polls;
CREATE TABLE polls (
    fid VARCHAR(100),
    state VARCHAR(3),
    division_id INTEGER,
    division_name VARCHAR(100),
    polling_place_id INTEGER,
    polling_place_type_id INTEGER,
    polling_place_name VARCHAR(100),
    premises_name VARCHAR(100),
    premises_address_1 VARCHAR(100),
    premises_address_2 VARCHAR(100),
    premises_address_3 VARCHAR(100),
    premises_suburb VARCHAR(100),
    premises_state_abbreviation VARCHAR(3),
    premises_post_code INTEGER,
    latitude FLOAT,
    longitude FLOAT,
    the_geom GEOMETRY(POINT,4326)
);
""")

conn.execute("""
DROP TABLE IF EXISTS population;
CREATE TABLE population (
    sa2_code VARCHAR(9),
    sa2_name VARCHAR(100),
    p_0_to_4 INTEGER,
    p_5_to_9 INTEGER,
    p_10_to_14 INTEGER,
    p_15_to_19 INTEGER,
    p_20_to_24 INTEGER,
    p_25_to_29 INTEGER,
    p_30_to_34 INTEGER,
    p_35_to_39 INTEGER,
    p_40_to_44 INTEGER,
    p_45_to_49 INTEGER,
    p_50_to_54 INTEGER,
    p_55_to_59 INTEGER,
    p_60_to_64 INTEGER,
    p_65_to_69 INTEGER,
    p_70_to_74 INTEGER,
    p_75_to_79 INTEGER,
    p_80_to_84 INTEGER,
    p_85_and_over INTEGER,
    total_people INTEGER
);
"""
)
#p_0_to_4 is the number of people age from 0 to 4

conn.execute("""
DROP TABLE IF EXISTS income;
CREATE TABLE income (
    sa2_code VARCHAR(9),
    sa2_name VARCHAR(100),
    earners INTEGER,
    median_age INTEGER,
    median_income INTEGER,
    mean_income INTEGER
);
"""
)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x24d11e9c520>

Importing dataframes to sql

In [72]:
businesses.to_sql('businesses', conn, if_exists='append', index=False)
polls.to_sql('polls', conn, if_exists='append', index=False)
population.to_sql('population', conn, if_exists='append', index=False)
income.to_sql('income', conn, if_exists='append', index=False)