# permits-data / Visualization

ETL pipeline for construction permits data in Los Angeles, California, USA.

For more information:
https://data.lacity.org/A-Prosperous-City/Building-and-Safety-Permit-Information/yv23-pmwf

In [6]:
import os
import sys

# Set path for modules
sys.path[0] = '../'

from dotenv import load_dotenv, find_dotenv
import numpy as np
import pandas as pd
import psycopg2
import matplotlib.pyplot as plt
import seaborn as sns

# Import custom eda and sql functions
from src.toolkits.eda import get_snapshot
from src.toolkits.sql import connect_db, fetch_data

# Set notebook display options
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

sns.set_context("paper")

In [7]:
# Get project root directory
root_dir = os.path.dirname(os.getcwd())

# Set environment variables
load_dotenv(find_dotenv());
POSTGRES_USER = os.getenv("POSTGRES_USER")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")
POSTGRES_DB = os.getenv("POSTGRES_DB")
DB_PORT = os.getenv("DB_PORT")
DB_HOST = os.getenv("DB_HOST")
DATA_URL = os.getenv("DATA_URL")

# Environment variables specific to notebook
DATA_DIR = os.path.dirname(root_dir) + '/data'
DB_TABLE = "permits_raw"

In [8]:
# Available graph styles
print(plt.style.available)

['Solarize_Light2', '_classic_test_patch', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn', 'seaborn-bright', 'seaborn-colorblind', 'seaborn-dark', 'seaborn-dark-palette', 'seaborn-darkgrid', 'seaborn-deep', 'seaborn-muted', 'seaborn-notebook', 'seaborn-paper', 'seaborn-pastel', 'seaborn-poster', 'seaborn-talk', 'seaborn-ticks', 'seaborn-white', 'seaborn-whitegrid', 'tableau-colorblind10']


In [9]:
# Connect to db
conn = connect_db()

# Extract partial dataset
sql = 'SELECT * FROM {} LIMIT 500;'.format(DB_TABLE)

# Columns to parse as dates
date_columns = ['status_date', 'issue_date', 'license_expiration_date']

# Fetch data
data = fetch_data(sql, conn, parse_dates=date_columns)

Connected as user "postgres" to database "permits" on http://localhost:5432.


In [11]:
data.dtypes

assessor_book                                     object
assessor_page                                     object
assessor_parcel                                   object
tract                                             object
block                                             object
lot                                               object
reference_no_old_permit_no                        object
pcis_permit_no                                    object
status                                            object
status_date                               datetime64[ns]
permit_type                                       object
permit_sub_type                                   object
permit_category                                   object
project_number                                    object
event_code                                       float64
initiating_office                                 object
issue_date                                datetime64[ns]
address_start                  