# Task 1

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, MultiPolygon
from geoalchemy2 import Geometry, WKTElement
import matplotlib.pyplot as plt
import json

In [None]:
#Importing Feature Geometry Shapefiles
catchments_primary = gpd.read_file("Catchments/catchments_primary.shp")
catchments_secondary = gpd.read_file("Catchments/catchments_secondary.shp")
catchments_future = gpd.read_file("Catchments/catchments_future.shp")
sa2_boundaries = gpd.read_file("SA2_2021_AUST_SHP_GDA2020/SA2_2021_AUST_GDA2020.shp")
unemployment = gpd.read_file("Data/SA2_unemployment.shp")

In [None]:
#Importing csv, txt files
businesses=pd.read_csv("Businesses.csv")
income=pd.read_csv("Income.csv")
polling=pd.read_csv("PollingPlaces2019.csv")
population=pd.read_csv("Population.csv")
toilets = pd.read_csv('Australia Public Toilet Map.csv')
stops=pd.read_csv("Stops.txt", sep=",")

In [None]:
from sqlalchemy import create_engine
import psycopg2
import psycopg2.extras
import json

credentials = "Credentials.json"

def pgconnect(credential_filepath, db_schema="public"):
    with open(credential_filepath) as f:
        db_conn_dict = json.load(f)
        host       = db_conn_dict['host']
        db_user    = db_conn_dict['user']
        db_pw      = db_conn_dict['password']
        default_db = db_conn_dict['user']
        try:
            db = create_engine('postgresql+psycopg2://'+db_user+':'+db_pw+'@'+host+'/'+default_db, echo=False)
            conn = db.connect()
            print('Connected successfully.')
        except Exception as e:
            print("Unable to connect to the database.")
            print(e)
            db, conn = None, None
        return db,conn

def query(conn, sqlcmd, args=None, df=True):
    result = pd.DataFrame() if df else None
    try:
        if df:
            result = pd.read_sql_query(sqlcmd, conn, params=args)
        else:
            result = conn.execute(sqlcmd, args).fetchall()
            result = result[0] if len(result) == 1 else result
    except Exception as e:
        print("Error encountered: ", e, sep='\n')
    return result

In [None]:
db, conn = pgconnect(credentials)

### Traffic

In [2]:
with open("live-traffic-cameras.json") as traffic:
    data = json.load(traffic)


In [3]:
traffic = pd.DataFrame(columns=["geo_point_2d", "region", "title", "view", "direction", "href", "photo"])

for i in range(0, len(data)):
    currentItem = data[i]
    traffic.loc[i] = [data[i]["geo_point_2d"], data[i]["region"], data[i]["title"], data[i]["view"], data[i]["direction"], data[i]["href"], data[i]["photo"]]


In [5]:
traffic[['lng', 'lat']] = traffic["geo_point_2d"].apply(pd.Series)
traffic['geom'] = gpd.points_from_xy(traffic.lng, traffic.lat)
traffic = traffic.drop(columns=['geo_point_2d', 'lng', 'lat', 'photo', 'href'])
traffic['id']=traffic.reset_index().index+1
traffic['geom'] = traffic['geom'].apply(lambda x: WKTElement(x.wkt, srid=4326))
traffic

Unnamed: 0,region,title,view,direction,geom
0,SYD_MET,Anzac Bridge (Eastbound),Intersection of Victoria Road and Anzac Bridge...,E,POINT (151.18022 -33.86789)
1,SYD_WEST,Victoria Road (Parramatta),Victoria Road at Church Street looking east to...,E,POINT (151.00533 -33.80866)
2,SYD_NORTH,Cumberland Highway (Carlingford),Cumberland Highway at Marsden Road looking nor...,N,POINT (151.05237 -33.77967)
3,REG_NORTH,M1 Pacific Motorway (Mount White),M1 Pacific Motorway at the Mount White heavy v...,N,POINT (151.20595 -33.45451)
4,SYD_NORTH,Falcon Street (Crows Nest),Corner of Falcon Street and the Pacific Highwa...,E,POINT (151.20084 -33.82768)
...,...,...,...,...,...
159,SYD_NORTH,Webbs Creek ferry (West),St Albans Road at Webbs Creek ferry looking we...,W,POINT (150.978459 -33.386823)
160,SYD_WEST,Sackville ferry (South),Sackville Road at Sackville ferry looking nort...,N,POINT (150.876052 -33.502874)
161,SYD_WEST,Great Western Highway (Bathurst),Great Western Highway at Havannah Street looki...,E,POINT (149.591139 -33.418282)
162,SYD_WEST,Lower Portland Ferry (West),West Portland Road at Lower Portland Ferry loo...,W,POINT (150.885556 -33.438139)


In [None]:
sql = """
DROP TABLE IF EXISTS traffic;
CREATE TABLE traffic (
    region VARCHAR(100), 
    title VARCHAR(100), 
    view TEXT,
    direction VARCHAR(3),
    geom GEOMETRY(POINT,4326),
    id INTEGER PRIMARY KEY
);"""


In [None]:
traffic.to_sql('traffic', conn, if_exists='append', index=False, dtype={'geom': Geometry('POINT', 4326)})
query(conn, "select * from traffic")

### Catchments

In [None]:
catchments_primary=catchments_primary.drop(catchments_primary.loc[:,'ADD_DATE':'PRIORITY'].columns, axis=1)
catchments_primary=catchments_primary.rename(columns={'USE_ID':'use_id', 'CATCH_TYPE':'catch_type', 'USE_DESC':'use_desc'})

In [None]:
def create_wkt_element(geom, srid):
    if geom.geom_type == 'Polygon':
        geom = MultiPolygon([geom])
    return WKTElement(geom.wkt, srid)

catchments_primary['geom'] = catchments_primary['geometry'].apply(lambda x: create_wkt_element(geom=x,srid=4326))
catchments_primary=catchments_primary.drop(columns="geometry")

In [None]:
sql = """
DROP TABLE IF EXISTS catchments_primary;
CREATE TABLE catchments_primary (
    USE_ID INTEGER PRIMARY KEY,
    CATCH_TYPE TEXT,
    USE_DESC TEXT, 
    geom GEOMETRY(MULTIPOLYGON,4326)

);"""

query(conn, sql)

In [None]:
catchments_primary.to_sql('catchments_primary', conn, if_exists='append', index=False, dtype={'geom': Geometry('MULTIPOLYGON', srid=4326)})
query(conn, "select * from catchments_primary")

In [None]:
catchments_secondary=catchments_secondary.drop(catchments_secondary.loc[:,'ADD_DATE':'PRIORITY'].columns, axis=1)
catchments_secondary=catchments_secondary.rename(columns={'USE_ID':'use_id', 'CATCH_TYPE':'catch_type', 'USE_DESC':'use_desc'})

In [None]:
catchments_secondary['geom'] = catchments_secondary['geometry'].apply(lambda x: create_wkt_element(geom=x,srid=4326))
catchments_secondary=catchments_secondary.drop(columns="geometry")

In [None]:
sql = """
DROP TABLE IF EXISTS catchments_secondary;
CREATE TABLE catchments_secondary (
    USE_ID INTEGER PRIMARY KEY,
    CATCH_TYPE TEXT,
    USE_DESC TEXT,
    geom GEOMETRY(MULTIPOLYGON,4326)
);"""


query(conn, sql)

In [None]:
catchments_secondary.to_sql('catchments_secondary', conn, if_exists='append', index=False, dtype={'geom': Geometry('MULTIPOLYGON', srid=4326)})
query(conn, "select * from catchments_secondary")

In [None]:
catchments_future=catchments_future.drop(catchments_future.loc[:,'ADD_DATE':'YEAR12'].columns, axis=1)
catchments_future=catchments_future.rename(columns={'USE_ID':'use_id', 'CATCH_TYPE':'catch_type', 'USE_DESC':'use_desc'})

In [None]:
catchments_future['geom'] = catchments_future['geometry'].apply(lambda x: create_wkt_element(geom=x,srid=4326))
catchments_future=catchments_future.drop(columns="geometry")

In [None]:
sql = """
DROP TABLE IF EXISTS catchments_future;
CREATE TABLE catchments_future (
    USE_ID INTEGER PRIMARY KEY,
    CATCH_TYPE TEXT,
    USE_DESC TEXT,
    geom GEOMETRY(MULTIPOLYGON,4326)
);"""

query(conn, sql)

In [None]:
catchments_future.to_sql('catchments_future', conn, if_exists='append', index=False, dtype={'geom': Geometry('MULTIPOLYGON', srid=4326)})
query(conn, "select * from catchments_future")

### SA2 Boundaries

In [None]:
sa2_boundaries = sa2_boundaries.drop(['CHG_FLAG21', 'CHG_LBL21', 'SA3_CODE21', 'SA3_NAME21', 'SA4_CODE21', 'SA4_NAME21', 'GCC_CODE21', 'STE_CODE21', 'STE_NAME21', 'AUS_CODE21', 'AUS_NAME21', 'LOCI_URI21'], axis = 1)
sa2_boundaries = sa2_boundaries[sa2_boundaries['GCC_NAME21'] == 'Greater Sydney']
sa2_boundaries = sa2_boundaries.drop(['GCC_NAME21'], axis = 1)

In [None]:
sa2_boundaries['geom'] = sa2_boundaries['geometry'].apply(lambda x: create_wkt_element(geom=x,srid=4326))

In [None]:
sa2_boundaries = sa2_boundaries.drop(['geometry'], axis = 1)
sa2_boundaries = sa2_boundaries.rename(columns={'SA2_CODE21': 'sa2_code21', 'SA2_NAME21': 'sa2_name21', 'AREASQKM21': 'areasqkm21'})

In [None]:
sql = """
DROP TABLE IF EXISTS sa2_boundaries;
CREATE TABLE sa2_boundaries (
    sa2_code21 INTEGER PRIMARY KEY, 
    sa2_name21 TEXT, 
    areasqkm21 FLOAT,
    geom GEOMETRY(MULTIPOLYGON,4326)
);"""

query(conn, sql)

In [None]:
sa2_boundaries.to_sql('sa2_boundaries', conn, if_exists='append', index=False, dtype={'geom': Geometry('MULTIPOLYGON', srid=4326)})
query(conn, "select * from sa2_boundaries")

### Income

In [None]:
income=income.replace({'np': None})
income=income.dropna() #rows containing np did not contain integer values, hence removal

In [None]:
sql = """
DROP TABLE IF EXISTS income;
CREATE TABLE income(
    sa2_code INTEGER PRIMARY KEY,
    sa2_name VARCHAR(100), 
    earners INTEGER, 
    median_age INTEGER,
    median_income INTEGER,
    mean_income INTEGER
);"""


query(conn, sql)

In [None]:
income.to_sql('income', conn, if_exists='append', index=False)
query(conn, "select * from income")

### Businesses

In [None]:
businesses=businesses.drop(businesses.columns[[0]], axis=1) #dropped industry code to remove repeated information
#created a primary key from industry name and sa2 code columns, since no single column was a unique identifier for records

In [None]:
sql = """
DROP TABLE IF EXISTS businesses;
CREATE TABLE businesses(
    industry_name VARCHAR(100),
    sa2_code INTEGER, 
    sa2_name VARCHAR(100),
    "0_to_50k_businesses" INTEGER,
    "50k_to_200k_businesses" INTEGER,
    "200k_to_2m_businesses" INTEGER,
    "2m_to_5m_businesses" INTEGER,
    "5m_to_10m_businesses" INTEGER,
    "10m_or_more_businesses" INTEGER,
    total_businesses INTEGER,
    PRIMARY KEY (industry_name, sa2_code) 
);"""

query(conn, sql)

In [None]:
businesses.to_sql('businesses', conn, if_exists='append', index=False)
query(conn, "select * from businesses")

### Population

In [None]:
sql = """
DROP TABLE IF EXISTS population;
CREATE TABLE population (
    sa2_code INTEGER PRIMARY KEY, 
    sa2_name VARCHAR(100), 
    "0-4_people" INTEGER, 
    "5-9_people" INTEGER, 
    "10-14_people" INTEGER,
    "15-19_people" INTEGER, 
    "20-24_people" INTEGER, 
    "25-29_people" INTEGER, 
    "30-34_people" INTEGER, 
    "35-39_people" INTEGER, 
    "40-44_people" INTEGER, 
    "45-49_people" INTEGER, 
    "50-54_people" INTEGER, 
    "55-59_people" INTEGER, 
    "60-64_people" INTEGER, 
    "65-69_people" INTEGER, 
    "70-74_people" INTEGER, 
    "75-79_people" INTEGER, 
    "80-84_people" INTEGER, 
    "85-and-over_people" INTEGER, 
    total_people INTEGER
);"""

query(conn, sql)

In [None]:
population.to_sql('population', conn, if_exists='append', index=False)
query(conn, "select * from population")

### Toilets...

In [None]:
toilets = toilets.drop(['URL', 'FacilityType', 'AddressNote', 'Parking', 'ParkingNote', 'KeyRequired', 'MLAK24', 'MLAKAfterHours', 'PaymentRequired', 'AccessNote', 'AdultChange', 'ChangingPlaces', 'BYOSling', 'ACShower', 'ACMLAK', 'AdultChangeNote', 'BabyChange', 'BabyCareRoom', 'BabyChangeNote', 'DumpPoint', 'DPWashout', 'DPAfterHours', 'DumpPointNote', 'OpeningHours', 'OpeningHoursNote', 'Male', 'Female', 'Unisex', 'AllGender', 'Ambulant', 'Accessible', 'LHTransfer', 'RHTransfer', 'ToiletNote', 'SharpsDisposal', 'DrinkingWater', 'SanitaryDisposal', 'MensPadDisposal', 'Shower', 'ParkingAccessible'  ], axis=1)

In [None]:
#Turning toilet latitude and longitude to point data
toilets['geom'] = gpd.points_from_xy(toilets.Longitude, toilets.Latitude)
toilets = toilets.drop(['Latitude', 'Longitude'], axis = 1)
toilets['geom'] = toilets['geom'].apply(lambda x: WKTElement(x.wkt, srid=4326))

#Filtering out toilets in NSW
toilets = toilets[toilets['State'] == 'NSW']
toilets = toilets.drop(['State'], axis = 1)

In [None]:
toilets = toilets.rename(columns={'FacilityID': 'facilityid','Name': 'name','Address1': 'address1','Town': 'town'})

In [None]:
sql = """
DROP TABLE IF EXISTS toilets;
CREATE TABLE toilets (
    facilityid INTEGER PRIMARY KEY, 
    name VARCHAR(100), 
    address1 TEXT,
    town TEXT,
    geom GEOMETRY(POINT,4326)
);"""

query(conn, sql)

In [None]:
toilets.to_sql('toilets', conn, if_exists='append', index=False, dtype={'geom': Geometry('POINT', 4326)})
query(conn, "select * from toilets")

### Polling

In [None]:
polling = polling.drop(['latitude', 'longitude','FID', 'state', 'division_id', 'polling_place_type_id', 'premises_address_1', 'premises_address_2', 'premises_address_3', 'premises_state_abbreviation', 'premises_post_code'], axis = 1)

In [None]:
polling = polling[polling['the_geom'].notna()]
polling = polling.rename(columns={"the_geom": "geom"})


In [None]:
sql = """
DROP TABLE IF EXISTS polling;
CREATE TABLE polling (
    polling_place_id INTEGER PRIMARY KEY, 
    division_name VARCHAR(100), 
    polling_place_name TEXT,
    premises_name TEXT,
    premises_suburb TEXT,
    geom GEOMETRY(POINT,4326)
);"""


query(conn, sql)

In [None]:
polling.to_sql('polling', conn, if_exists='append', index=False, dtype={'geom': Geometry('POINT', 4326)})
query(conn, "select * from polling")

### Stops

In [None]:
stops = stops.drop(['stop_code', 'location_type', 'parent_station', 'wheelchair_boarding', 'platform_code'], axis = 1)

In [None]:
stops['geom'] = gpd.points_from_xy(stops.stop_lon, stops.stop_lat)
stops = stops.drop(['stop_lon', 'stop_lat'], axis = 1)
stops['geom'] = stops['geom'].apply(lambda x: WKTElement(x.wkt, srid=4326))


In [None]:
sql = """
DROP TABLE IF EXISTS stops;
CREATE TABLE stops (
    stop_id TEXT PRIMARY KEY, 
    stop_name VARCHAR(100), 
    geom GEOMETRY(POINT,4326)
);"""

query(conn, sql)

In [None]:
stops.to_sql('stops', conn, if_exists='append', index=False, dtype={'geom': Geometry('POINT', 4326)})
query(conn, "select * from stops")

### Unemployment

In [None]:
#Dropping SA2_MAIN16 & STATE_CODE columns
unemployment=unemployment.drop(unemployment.columns[[1]], axis=1)
unemployment=unemployment.drop(unemployment.columns[[2]], axis=1)

In [None]:
#Renaming columns
unemployment=unemployment.rename(columns={'SA2_MAIN':'sa2_code'})
unemployment=unemployment.rename(columns={'SA2_NAME':'sa2_name'})
unemployment=unemployment.rename(columns={'STATE_NAME':'state_name'})
unemployment=unemployment.rename(columns={'AREA_SQKM':'area_sqkm'})
unemployment=unemployment.rename(columns={'PER_UNEMPL':'per_unempl'})
unemployment

In [None]:
#Converting polygons to multipolygons
unemployment['geom'] = unemployment['geometry'].apply(lambda x: create_wkt_element(geom=x,srid=4326))
unemployment = unemployment.drop(['geometry'], axis = 1)

In [None]:
sql = """

DROP TABLE IF EXISTS unemployment;
CREATE TABLE unemployment (
    sa2_code INTEGER PRIMARY KEY,
    sa2_name TEXT,
    state_name TEXT,
    area_sqkm FLOAT,
    per_unempl FLOAT,
    geom GEOMETRY(MULTIPOLYGON,4326)

);"""

query(conn, sql)

In [None]:
unemployment.to_sql('unemployment', conn, if_exists='append', index=False, dtype={'geom': Geometry('MULTIPOLYGON', srid=4326)})
query(conn, "select * from unemployment")