# SQL Package

Provides simple functionality to interact with a PostgreSQL server using Python classes.

**Overview of functionality:**
* Database(self, user, password, host, dbname, port)
    * properties
        * user
        * password
        * host
        * dbname
        * port
    * methods
        * create(name) x
        * connect()
        * drop(name)
* Table(self, dbname, table, schema)
    * accepts db properties
    * properties
        * connect() --> inherited
        * fetch_data(sql, con, parse_dates)
        * get_names()
        * format_names(char_dict)
        * update_names(names_dict)
        * add_columns(columns_list, type=None)
        * compare_column_order(dataframe)
        * match_columns(dataframe)
        * save_csv(data, local_path, match_column_order=True)
        * update_values(local_path, container_path)
        * update_types(types_dict)
        * close()

## Setup

In [1]:
import os
import sys
from pathlib import Path
#sys.path[0] = str(Path(__file__).resolve().parents[2]) # Set path for custom modules
import warnings
from io import StringIO

# Set path for modules
sys.path[0] = '../'

from dotenv import load_dotenv, find_dotenv
import numpy as np
import pandas as pd

# SQL libraries
import psycopg2
from src.pipeline.dictionaries import types_dict, replace_map
from src.pipeline.transform_data import create_full_address, split_lat_long
from src.toolkits.geospatial import geocode_from_address
from src.toolkits.postgresql import Database, Table

# Set notebook display options
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# Get project root directory
#root_dir = os.path.dirname(os.getcwd())

# if modulename not in sys.modules: print...
load_dotenv(find_dotenv());

In [2]:
root_dir = os.path.dirname(os.getcwd())

In [3]:
types_dict_abbrev = {'assessor_book': 'SMALLINT',
             'assessor_page': 'SMALLINT',
             'assessor_parcel': 'CHAR(3)',
             'tract': 'VARCHAR(200)',
             'block': 'VARCHAR(50)',
             'lot': 'VARCHAR(50)',
             'reference_no_old_permit_no': 'VARCHAR(50)',
             'pcis_permit_no': 'VARCHAR(50)',
             'status': 'VARCHAR(50)',
             'status_date': 'DATE',
             'permit_type': 'VARCHAR(50)',
             'permit_sub_type': 'VARCHAR(50)',
             'permit_category': 'VARCHAR(50)',
             'project_number': 'SMALLINT',
             'event_code': 'VARCHAR(50)',
             'initiating_office': 'VARCHAR(50)',
             'issue_date': 'DATE',
             'address_start': 'INTEGER',
             'address_fraction_start': 'CHAR(3)',
             'address_end': 'INTEGER',
             'address_fraction_end': 'CHAR(3)',
             'street_direction': 'CHAR(1)',
             'street_name': 'VARCHAR(50)',
             'street_suffix': 'VARCHAR(10)',
             'suffix_direction': 'VARCHAR(10)',
             'unit_range_start': 'VARCHAR(50)',
             'unit_range_end': 'VARCHAR(50)',
             'zip_code': 'INTEGER',
             'work_description': 'TEXT',
             'valuation': 'NUMERIC',
             'floor_area_la_zoning_code_definition': 'VARCHAR(10)',
             'no_of_residential_dwelling_units': 'SMALLINT',
             'no_of_accessory_dwelling_units': 'SMALLINT',
             'no_of_stories': 'SMALLINT',
             'contractors_business_name': 'VARCHAR(100)',
             'contractor_address': 'VARCHAR(100)',
             'contractor_city': 'VARCHAR(50)',
             'contractor_state': 'CHAR(2)',
             'license_type': 'VARCHAR(10)',
             'license_no': 'INTEGER',
             'principal_first_name': 'VARCHAR(50)',
             'principal_middle_name': 'VARCHAR(50)',
             'principal_last_name': 'VARCHAR(50)',
             'license_expiration_date': 'DATE',
             'applicant_first_name': 'VARCHAR(50)',
             'applicant_last_name': 'VARCHAR(50)',
             'applicant_business_name': 'VARCHAR(100)',
             'applicant_address_1': 'VARCHAR(50)',
             'applicant_address_2': 'VARCHAR(50)',
             'applicant_address_3': 'VARCHAR(50)',
             'zone': 'VARCHAR(50)',
             'occupancy': 'VARCHAR(50)',
             'floor_area_la_building_code_definition': 'VARCHAR(10)',
             'census_tract': 'VARCHAR(10)',
             'council_district': 'SMALLINT',
             'latitude_longitude': 'VARCHAR(50)',
             'applicant_relationship': 'VARCHAR(50)',
             'existing_code': 'SMALLINT',
             'proposed_code': 'SMALLINT'}

In [4]:
permits = Database()

In [5]:
params = {"table_name":"permits_raw", "types_dict":types_dict_abbrev, "id_col":"pcis_permit_no"}
permits.drop_table('permits_raw').create_table(**params)
!cd ../ && bash scripts/load_db.sh

Query successful on database "permits".
Query successful on database "permits".
Copying 500 rows into table...
COPY 500


In [6]:
permits.list_tables()

['permits_raw', 'tmp_permits_raw']

### Run Pipeline

1. Standardize table names.<br>
2. Fetch raw data and transform:
    - Concatenate address columns
    - Geocode missing coordinates using street address
    - Extract latitude and longitude from coordinates into their own columns
3. Update the table with the transformed data:
    - Add new columns to PostgreSQL table
    - Update values

In [7]:
permits_raw = Table(name="permits_raw")

#### Standardize table names

In [8]:
permits_raw.format_table_names(replace_map=replace_map, update=True)

Error: column "assessor_book" of relation "permits_raw" already exists



<src.toolkits.postgresql.Table at 0x102a086d0>

#### Fetch and Transform

In [9]:
data = permits_raw.fetch_data()

In [10]:
data.head()

Unnamed: 0,assessor_book,assessor_page,assessor_parcel,tract,block,lot,reference_no_old_permit_no,pcis_permit_no,status,status_date,permit_type,permit_sub_type,permit_category,project_number,event_code,initiating_office,issue_date,address_start,address_fraction_start,address_end,address_fraction_end,street_direction,street_name,street_suffix,suffix_direction,unit_range_start,unit_range_end,zip_code,work_description,valuation,floor_area_la_zoning_code_definition,no_of_residential_dwelling_units,no_of_accessory_dwelling_units,no_of_stories,contractors_business_name,contractor_address,contractor_city,contractor_state,license_type,license_no,principal_first_name,principal_middle_name,principal_last_name,license_expiration_date,applicant_first_name,applicant_last_name,applicant_business_name,applicant_address_1,applicant_address_2,applicant_address_3,zone,occupancy,floor_area_la_building_code_definition,census_tract,council_district,latitude_longitude,applicant_relationship,existing_code,proposed_code
0,4317,3,***,TR 30210-C,,LT 1,,15044-90000-08405,Permit Finaled,2015-09-10,HVAC,1 or 2 Family Dwelling,No Plan Check,,,INTERNET,2015-08-18,1823,1/2,1823,1/2,S,THAYER,AVE,,,,90025,,,,,,,CONDITIONED AIRE MECHANICAL & ENGINEERING INC,18650 PARTHENIA STREET,NORTHRIDGE,CA,C20,532440,BRETT,MOORE,HOFFER,2016-06-30,BRETT,HOFFER,,18650 PARTHENIA ST,,"NORTHRIDGE, CA",R3-1-O,,0.0,2671.0,5,"(34.05474, -118.42628)",Net Applicant,,
1,5005,10,017,CHESTERFIELD SQUARE,,465,16SL57806,16016-70000-02464,Permit Finaled,2017-08-01,Bldg-Alter/Repair,1 or 2 Family Dwelling,No Plan Check,,,SOUTH LA,2016-02-04,2122,,2122,,W,54TH,ST,,,,90062,General rehabilitation for single family dwell...,40000.0,,,,,OWNER-BUILDER,,,,,0,JAVIER,,TALAMANTES,,JAVIER,TALAMANTES,OWNER-BUILDER,,,,C2-1VL,,,2325.0,8,"(33.99307, -118.31668)",Owner-Bldr,1.0,
2,5154,23,022,SUN-SET TRACT,D,13,14VN81535,14016-20000-13092,Issued,2014-08-13,Bldg-Alter/Repair,Apartment,Plan Check,,,VAN NUYS,2014-08-13,415,,415,,S,BURLINGTON,AVE,,1-30,1-30,90057,PHOTOVOLTAIC SOLAR PANELS ON ROOF OF (E) APT BLDG,37000.0,,,,,PERMACITY CONSTRUCTION CORP,5570 W WASHINGTON BLVD,LOS ANGELES,CA,B,827864,JONATHAN,SAUL,PORT,2015-11-30,LINDA,MARTON,,710 WILSHIRE BLVD,,"SANTA MONICA, CA",R4-1,,,2089.04,1,"(34.06012, -118.26997)",Agent for Owner,5.0,
3,4404,30,010,TR 12086,,2,,16044-30000-09658,Permit Finaled,2016-08-29,HVAC,1 or 2 Family Dwelling,No Plan Check,,,WEST LA,2016-08-22,315,,315,,S,OCEANO,DR,,,,90049,,,,,,,E/C HEATING AND AIR CONDITION,26888 CUATRO MILPAS ST,VALENCIA,CA,C20,651051,EDY,RUDOLFO,CORDON,2018-07-31,,,,,,,RS-1,,0.0,2640.0,11,"(34.05707, -118.4732)",Contractor,,
4,2646,19,011,TR 7158,,11,,17042-90000-31792,Permit Finaled,2017-12-28,Plumbing,1 or 2 Family Dwelling,No Plan Check,,,INTERNET,2017-12-26,13640,,13640,,W,PIERCE,ST,,,,91331,,,,,,,TITANIUM POWER INC,1545 S LA CIENEGA BLVD,LOS ANGELES,CA,B,989217,DENNIS,HARUO,MIYAHIRA,2017-12-31,YONI,GHERMEZI,,1545 S LA CIENEGA BLVD,,"LOS ANGELES, CA",R1-1-O,,0.0,1044.03,7,"(34.25487, -118.43002)",Net Applicant,,


In [11]:
data = create_full_address(data)

In [12]:
data.head(1)

Unnamed: 0,assessor_book,assessor_page,assessor_parcel,tract,block,lot,reference_no_old_permit_no,pcis_permit_no,status,status_date,permit_type,permit_sub_type,permit_category,project_number,event_code,initiating_office,issue_date,address_start,address_fraction_start,address_end,address_fraction_end,street_direction,street_name,street_suffix,suffix_direction,unit_range_start,unit_range_end,zip_code,work_description,valuation,floor_area_la_zoning_code_definition,no_of_residential_dwelling_units,no_of_accessory_dwelling_units,no_of_stories,contractors_business_name,contractor_address,contractor_city,contractor_state,license_type,license_no,principal_first_name,principal_middle_name,principal_last_name,license_expiration_date,applicant_first_name,applicant_last_name,applicant_business_name,applicant_address_1,applicant_address_2,applicant_address_3,zone,occupancy,floor_area_la_building_code_definition,census_tract,council_district,latitude_longitude,applicant_relationship,existing_code,proposed_code,full_address
0,4317,3,***,TR 30210-C,,LT 1,,15044-90000-08405,Permit Finaled,2015-09-10,HVAC,1 or 2 Family Dwelling,No Plan Check,,,INTERNET,2015-08-18,1823,1/2,1823,1/2,S,THAYER,AVE,,,,90025,,,,,,,CONDITIONED AIRE MECHANICAL & ENGINEERING INC,18650 PARTHENIA STREET,NORTHRIDGE,CA,C20,532440,BRETT,MOORE,HOFFER,2016-06-30,BRETT,HOFFER,,18650 PARTHENIA ST,,"NORTHRIDGE, CA",R3-1-O,,0,2671.0,5,"(34.05474, -118.42628)",Net Applicant,,,1823 S THAYER AVE 90025


In [13]:
geocode_from_address(data);

Cost for geocoding 19 addresses is $0.10.
Geocoding...
19 locations were assigned coordinates.


In [14]:
data = split_lat_long(data)

In [15]:
data.head()

Unnamed: 0,assessor_book,assessor_page,assessor_parcel,tract,block,lot,reference_no_old_permit_no,pcis_permit_no,status,status_date,permit_type,permit_sub_type,permit_category,project_number,event_code,initiating_office,issue_date,address_start,address_fraction_start,address_end,address_fraction_end,street_direction,street_name,street_suffix,suffix_direction,unit_range_start,unit_range_end,zip_code,work_description,valuation,floor_area_la_zoning_code_definition,no_of_residential_dwelling_units,no_of_accessory_dwelling_units,no_of_stories,contractors_business_name,contractor_address,contractor_city,contractor_state,license_type,license_no,principal_first_name,principal_middle_name,principal_last_name,license_expiration_date,applicant_first_name,applicant_last_name,applicant_business_name,applicant_address_1,applicant_address_2,applicant_address_3,zone,occupancy,floor_area_la_building_code_definition,census_tract,council_district,latitude_longitude,applicant_relationship,existing_code,proposed_code,full_address,latitude,longitude
0,4317.0,3.0,***,TR 30210-C,,LT 1,,15044-90000-08405,Permit Finaled,2015-09-10,HVAC,1 or 2 Family Dwelling,No Plan Check,,,INTERNET,2015-08-18,1823.0,1/2,1823.0,1/2,S,THAYER,AVE,,,,90025,,,,,,,CONDITIONED AIRE MECHANICAL & ENGINEERING INC,18650 PARTHENIA STREET,NORTHRIDGE,CA,C20,532440.0,BRETT,MOORE,HOFFER,2016-06-30,BRETT,HOFFER,,18650 PARTHENIA ST,,"NORTHRIDGE, CA",R3-1-O,,0.0,2671.0,5.0,"(34.05474, -118.42628)",Net Applicant,,,1823 S THAYER AVE 90025,34.05474,-118.42628
1,5005.0,10.0,017,CHESTERFIELD SQUARE,,465,16SL57806,16016-70000-02464,Permit Finaled,2017-08-01,Bldg-Alter/Repair,1 or 2 Family Dwelling,No Plan Check,,,SOUTH LA,2016-02-04,2122.0,,2122.0,,W,54TH,ST,,,,90062,General rehabilitation for single family dwell...,40000.0,,,,,OWNER-BUILDER,,,,,0.0,JAVIER,,TALAMANTES,,JAVIER,TALAMANTES,OWNER-BUILDER,,,,C2-1VL,,,2325.0,8.0,"(33.99307, -118.31668)",Owner-Bldr,1.0,,2122 W 54TH ST 90062,33.99307,-118.31668
2,5154.0,23.0,022,SUN-SET TRACT,D,13,14VN81535,14016-20000-13092,Issued,2014-08-13,Bldg-Alter/Repair,Apartment,Plan Check,,,VAN NUYS,2014-08-13,415.0,,415.0,,S,BURLINGTON,AVE,,1-30,1-30,90057,PHOTOVOLTAIC SOLAR PANELS ON ROOF OF (E) APT BLDG,37000.0,,,,,PERMACITY CONSTRUCTION CORP,5570 W WASHINGTON BLVD,LOS ANGELES,CA,B,827864.0,JONATHAN,SAUL,PORT,2015-11-30,LINDA,MARTON,,710 WILSHIRE BLVD,,"SANTA MONICA, CA",R4-1,,,2089.04,1.0,"(34.06012, -118.26997)",Agent for Owner,5.0,,415 S BURLINGTON AVE 90057,34.06012,-118.26997
3,4404.0,30.0,010,TR 12086,,2,,16044-30000-09658,Permit Finaled,2016-08-29,HVAC,1 or 2 Family Dwelling,No Plan Check,,,WEST LA,2016-08-22,315.0,,315.0,,S,OCEANO,DR,,,,90049,,,,,,,E/C HEATING AND AIR CONDITION,26888 CUATRO MILPAS ST,VALENCIA,CA,C20,651051.0,EDY,RUDOLFO,CORDON,2018-07-31,,,,,,,RS-1,,0.0,2640.0,11.0,"(34.05707, -118.4732)",Contractor,,,315 S OCEANO DR 90049,34.05707,-118.4732
4,2646.0,19.0,011,TR 7158,,11,,17042-90000-31792,Permit Finaled,2017-12-28,Plumbing,1 or 2 Family Dwelling,No Plan Check,,,INTERNET,2017-12-26,13640.0,,13640.0,,W,PIERCE,ST,,,,91331,,,,,,,TITANIUM POWER INC,1545 S LA CIENEGA BLVD,LOS ANGELES,CA,B,989217.0,DENNIS,HARUO,MIYAHIRA,2017-12-31,YONI,GHERMEZI,,1545 S LA CIENEGA BLVD,,"LOS ANGELES, CA",R1-1-O,,0.0,1044.03,7.0,"(34.25487, -118.43002)",Net Applicant,,,13640 W PIERCE ST 91331,34.25487,-118.43002


#### Update Table

In [16]:
types_dict

{'assessor_book': 'SMALLINT',
 'assessor_page': 'SMALLINT',
 'assessor_parcel': 'CHAR(3)',
 'tract': 'VARCHAR(200)',
 'block': 'VARCHAR(50)',
 'lot': 'VARCHAR(50)',
 'reference_no_old_permit_no': 'VARCHAR(50)',
 'pcis_permit_no': 'VARCHAR(50)',
 'status': 'VARCHAR(50)',
 'status_date': 'DATE',
 'permit_type': 'VARCHAR(50)',
 'permit_sub_type': 'VARCHAR(50)',
 'permit_category': 'VARCHAR(50)',
 'project_number': 'SMALLINT',
 'event_code': 'VARCHAR(50)',
 'initiating_office': 'VARCHAR(50)',
 'issue_date': 'DATE',
 'address_start': 'INTEGER',
 'address_fraction_start': 'CHAR(3)',
 'address_end': 'INTEGER',
 'address_fraction_end': 'CHAR(3)',
 'street_direction': 'CHAR(1)',
 'street_name': 'VARCHAR(50)',
 'street_suffix': 'VARCHAR(10)',
 'suffix_direction': 'VARCHAR(10)',
 'unit_range_start': 'VARCHAR(50)',
 'unit_range_end': 'VARCHAR(50)',
 'zip_code': 'INTEGER',
 'work_description': 'TEXT',
 'valuation': 'NUMERIC',
 'floor_area_la_zoning_code_definition': 'VARCHAR(10)',
 'no_of_residenti

In [17]:
permits_raw.update_values(data=data, id_col="pcis_permit_no", types_dict=types_dict)

Query successful on database "permits".


update_values:

 {'assessor_book': 'SMALLINT', 'assessor_page': 'SMALLINT', 'assessor_parcel': 'CHAR(3)', 'tract': 'VARCHAR(200)', 'block': 'VARCHAR(50)', 'lot': 'VARCHAR(50)', 'reference_no_old_permit_no': 'VARCHAR(50)', 'pcis_permit_no': 'VARCHAR(50)', 'status': 'VARCHAR(50)', 'status_date': 'DATE', 'permit_type': 'VARCHAR(50)', 'permit_sub_type': 'VARCHAR(50)', 'permit_category': 'VARCHAR(50)', 'project_number': 'SMALLINT', 'event_code': 'VARCHAR(50)', 'initiating_office': 'VARCHAR(50)', 'issue_date': 'DATE', 'address_start': 'INTEGER', 'address_fraction_start': 'CHAR(3)', 'address_end': 'INTEGER', 'address_fraction_end': 'CHAR(3)', 'street_direction': 'CHAR(1)', 'street_name': 'VARCHAR(50)', 'street_suffix': 'VARCHAR(10)', 'suffix_direction': 'VARCHAR(10)', 'unit_range_start': 'VARCHAR(50)', 'unit_range_end': 'VARCHAR(50)', 'zip_code': 'INTEGER', 'work_description': 'TEXT', 'valuation': 'NUMERIC', 'floor_area_la_zoning_code_definition': 'VA



Copying...


UPDATE permits_raw
SET assessor_book = tmp_permits_raw.assessor_book,
	assessor_page = tmp_permits_raw.assessor_page,
	assessor_parcel = tmp_permits_raw.assessor_parcel,
	tract = tmp_permits_raw.tract,
	block = tmp_permits_raw.block,
	lot = tmp_permits_raw.lot,
	reference_no_old_permit_no = tmp_permits_raw.reference_no_old_permit_no,
	pcis_permit_no = tmp_permits_raw.pcis_permit_no,
	status = tmp_permits_raw.status,
	status_date = tmp_permits_raw.status_date,
	permit_type = tmp_permits_raw.permit_type,
	permit_sub_type = tmp_permits_raw.permit_sub_type,
	permit_category = tmp_permits_raw.permit_category,
	project_number = tmp_permits_raw.project_number,
	event_code = tmp_permits_raw.event_code,
	initiating_office = tmp_permits_raw.initiating_office,
	issue_date = tmp_permits_raw.issue_date,
	address_start = tmp_permits_raw.address_start,
	address_fraction_start = tmp_permits_raw.address_fraction_start,
	address_end = tmp_permits_raw.address_end,
	address_fraction_end = t