# permits-data

Data analysis pipeline for construction permits in Los Angeles.

For more information:
https://data.lacity.org/A-Prosperous-City/Building-and-Safety-Permit-Information/yv23-pmwf

In [57]:
import os
import sys
import pandas as pd
import psycopg2

In [58]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [59]:
sys.path[0] = '../'

## 1. Import Data

In [60]:
raw_data = 'permits_raw.csv'

URL = "https://data.lacity.org/api/views/yv23-pmwf/rows.csv?accessType=DOWNLOAD"
DATA_PATH = sys.path[0] + 'data/raw/' + raw_data

In [61]:
#data = pd.read_csv(DATA_PATH)

In [None]:
conn = psycopg2.connect(dbname='permits',
                       user='postgres',
                       password='password',
                        host='localhost', 
                        port=5432)

#sql = 'SELECT * FROM permits_raw WHERE "Zip Code" IN (90012, 90013, 90014, 90015, 90017, 90071) LIMIT 1000;'
#sql = 'SELECT * FROM permits_raw;'

# Extract full dataset
#data = pd.read_sql_query(sql, conn)

### 1.1 Update Column Names in Postgres Database

In [199]:
# Get raw data column names
def get_table_names(table):
    sql = "SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = N'{}'".format(table)
    etl = pd.read_sql_query(sql, conn)
    old_columns = etl['column_name']
    
    return old_columns

In [200]:
# Retrieve table column names
old_columns = get_table_names("permits_raw")

In [201]:
# Rename columns, will update table later
def format_names(series):
    # Replace whitespace with underscore
    series = series.str.replace(' ', '_')

    # Replace hyphen with underscore
    series = series.str.replace('-', '_')

    # Replace hashtag with No (short for number)
    series = series.str.replace('#', 'No')

    # Replace forward slash with underscore
    series = series.str.replace('/', '_')

    # Remove period
    series = series.str.replace('.', '')

    # Remove open parenthesis
    series = series.str.replace('(', '')

    # Remove closed parenthesis
    series = series.str.replace(')', '')

    # Remove apostrophe
    series = series.str.replace("'", '')
    
    return series.str.lower()

In [202]:
# Transform table column names for permits_raw
new_columns = format_names(old_columns);

In [221]:
# Creates a SQL query to update table columns and writes to text file
def create_query(old_columns, new_columns, update_db=False):
    
    sql = 'ALTER TABLE permits_raw RENAME "{old_name}" to {new_name};'
    
    sql_query = []

    for idx, name in old_columns.iteritems():
        #print(idx, name)
        sql_query.append(sql.format(old_name=name, new_name=new_columns[idx]))
        
    update_names = '\n'.join(sql_query)
    # update later: sql_file = os.path.join(os.path.dirname(__file__), "../postgres/scripts/update_names.sql")
    with open('../postgres/scripts/update_names.sql', 'w') as text:
        text.write(update_names)
        
    if update_db==True:
        cur = conn.cursor()
        sql_file = open('../postgres/scripts/update_names.sql', 'r')
        cur.execute(sql_file.read())

In [222]:
# Create SQL query for permits_raw
create_query(old_columns, new_columns, update_db=True)

ProgrammingError: column "Assessor Book" does not exist


In [219]:
# Check table names are updated
get_table_names("permits_raw").head()

0      assessor_book
1      assessor_page
2    assessor_parcel
3              tract
4              block
Name: column_name, dtype: object

In [85]:
# Stores smaller version of dataset
#data[:10000].to_csv(sys.path[0] + 'data/interim/permits_small.csv', index=False)

In [94]:
data = pd.read_csv(sys.path[0] + 'data/interim/permits_small.csv')

In [95]:
data.head()

Unnamed: 0,Assessor Book,Assessor Page,Assessor Parcel,Tract,Block,Lot,Reference # (Old Permit #),PCIS Permit #,Status,Status Date,Permit Type,Permit Sub-Type,Permit Category,Project Number,Event Code,Initiating Office,Issue Date,Address Start,Address Fraction Start,Address End,Address Fraction End,Street Direction,Street Name,Street Suffix,Suffix Direction,Unit Range Start,Unit Range End,Zip Code,Work Description,Valuation,Floor Area-L.A. Zoning Code Definition,# of Residential Dwelling Units,# of Accessory Dwelling Units,# of Stories,Contractor's Business Name,Contractor Address,Contractor City,Contractor State,License Type,License #,Principal First Name,Principal Middle Name,Principal Last Name,License Expiration Date,Applicant First Name,Applicant Last Name,Applicant Business Name,Applicant Address 1,Applicant Address 2,Applicant Address 3,Zone,Occupancy,Floor Area-L.A. Building Code Definition,Census Tract,Council District,Latitude/Longitude,Applicant Relationship,Existing Code,Proposed Code
0,4127.0,15.0,19,TR 16160,,237,,19041-90000-19146,Issued,05/21/2019,Electrical,1 or 2 Family Dwelling,No Plan Check,,,INTERNET,05/21/2019,5424.0,,5424.0,,W,77TH,ST,,,,90045.0,,,,,,,JOE ELECTRIC,P O BOX 3132,CYPRESS,CA,C10,869457,JOSEPH,JOHN,ALAIMO,08/31/2019,JOSEPH,ALAIMO,,PO BOX 3132,,"CYPRESS, CA",R1-1,,0.0,2771.0,11.0,"(33.96762, -118.37549)",Net Applicant,,
1,4252.0,25.0,30,TR 5848,,13,,19041-90000-10671,Issued,03/26/2019,Electrical,Apartment,No Plan Check,,,INTERNET,03/26/2019,3726.0,,3726.0,,S,KELTON,AVE,,,,90034.0,,,,,,,A - CO TEMPORARY POWER,P O BOX 16843,NORTH HOLLYWOOD,CA,C10,777790,BRIAN,WAYNE,BURNS,04/30/2020,BRIAN,BURNS,,13244 RAYMER ST,,"NORTH HOLLYWOOD, CA",R3-1,,0.0,2718.02,5.0,"(34.01739, -118.4127)",Net Applicant,,
2,4109.0,22.0,11,TR 13840,,300,,15042-90000-15370,Permit Finaled,08/07/2015,Plumbing,1 or 2 Family Dwelling,No Plan Check,,,INTERNET,07/30/2015,7806.0,,7806.0,,S,BELAND,AVE,,,,90045.0,,,,,,,M & D PLUMBING INC,675 S GLENWOOD PLACE,BURBANK,CA,C36,900919,GLEN,ROY,CHRISTENSEN,07/31/2015,MICHAEL,CHRISTENSEN,,675 S GLENWOOD PL,,"BURBANK, CA",R1-1,,0.0,2770.0,11.0,"(33.97107, -118.40483)",Net Applicant,,
3,5474.0,24.0,13,TR 3631,,139,16VN26376,16016-20000-22031,Issued,09/16/2016,Bldg-Alter/Repair,1 or 2 Family Dwelling,No Plan Check,,,VAN NUYS,09/16/2016,4418.0,,4418.0,,E,LINCOLN,AVE,,,,90041.0,Window (2) change-out (same size & type) for r...,1023.0,,,,,HOME DEPOT AT-HOME SERVICES THE,2455 PACES FERRY RD,ATLANTA,GA,B,836021,GUS,ANTHONY,AVALOS,04/30/2018,RACHEL,,,,,,RD1.5-1,,,1862.01,1.0,"(34.12313, -118.22084)",Agent for Contractor,1.0,
4,2707.0,21.0,13,P M 1162,,A,,16042-90000-13603,Permit Finaled,07/08/2016,Plumbing,1 or 2 Family Dwelling,No Plan Check,,,INTERNET,07/05/2016,10520.0,,10520.0,,N,LERIDA,PL,,,,91311.0,,,,,,,DRAIN BUSTER ROOTER AND PLUMBING,345 W FOOTHILL BLVD STE 1,GLENDORA,CA,C36,961882,JOSE,AMILCAR,PALMA,06/30/2017,JOSE,PALMA,,345 W FOOTHILL BLVD,1.0,"GLENDORA, CA",RA-1,,0.0,1131.02,12.0,"(34.26165, -118.57186)",Net Applicant,,


In [93]:
data.shape

(10000, 59)

In [106]:
#data[(data['PCIS Permit #'].duplicated(keep=False)==1)].sort_values(by='PCIS Permit #')

## Clean Data

In [66]:
#data = format_names(data)

# Convert Valuation column to float values
data['Valuation'].fillna(0, inplace=True)
data['Valuation'] = data['Valuation'].astype(float);

In [67]:
data.head()

Unnamed: 0,Assessor Book,Assessor Page,Assessor Parcel,Tract,Block,Lot,Reference # (Old Permit #),PCIS Permit #,Status,Status Date,Permit Type,Permit Sub-Type,Permit Category,Project Number,Event Code,Initiating Office,Issue Date,Address Start,Address Fraction Start,Address End,Address Fraction End,Street Direction,Street Name,Street Suffix,Suffix Direction,Unit Range Start,Unit Range End,Zip Code,Work Description,Valuation,Floor Area-L.A. Zoning Code Definition,# of Residential Dwelling Units,# of Accessory Dwelling Units,# of Stories,Contractor's Business Name,Contractor Address,Contractor City,Contractor State,License Type,License #,Principal First Name,Principal Middle Name,Principal Last Name,License Expiration Date,Applicant First Name,Applicant Last Name,Applicant Business Name,Applicant Address 1,Applicant Address 2,Applicant Address 3,Zone,Occupancy,Floor Area-L.A. Building Code Definition,Census Tract,Council District,Latitude/Longitude,Applicant Relationship,Existing Code,Proposed Code
0,5408,11,907,TR 10418,,LT 2,,15041-10000-21893,Permit Finaled,09/20/2016,Electrical,Commercial,Plan Check,,,METRO,11/06/2015,650,,650,,N,MAIN,ST,,,,90012,,0.0,,,,,M A ELECTRIC,P O BOX 1783,COVINA,CA,C10,569475,MAURICIO,ALONSO,AVELAR,05/31/2017,ABEL,MARIN,ECCO ENGINEERING FIRM,726 W BROADWAY,A,GLENDALE CA,C4-1VL,,0.0,2071.0,14,,Engineer,,
1,5151,1,24,TR 21949,,LT 1,,17041-90000-32378,Issued,09/11/2017,Electrical,Commercial,No Plan Check,,,INTERNET,09/11/2017,261,,261,,S,FIGUEROA,ST,,260,,90012,,0.0,,,,,CANDUIT ELECTRIC INC,8218 HILLROSE ST,SUNLAND,CA,C10,1013159,CHRISTOPHER,MICHAEL,TRUELOVE,09/30/2018,CHRIS,TRUELOVE,,8218 HILLROSE,,"SUNLAND, CA",C4-4D,,0.0,2075.01,14,"(34.05546, -118.25446)",Net Applicant,,
2,5151,14,31,TR 21409,,LT 1,,16042-10000-28033,Permit Finaled,02/15/2017,Plumbing,Commercial,No Plan Check,,,METRO,12/28/2016,333,,333,,S,HOPE,ST,,23RD FL,,90071,,0.0,,,,,MUIR-CHASE PLUMBING CO INC,4530 BRAZIL STREET,LOS ANGELES,CA,C36,539835,GRANT,DRAKE,MUIR,08/31/2018,,,,,,,C4-4D,,0.0,2075.02,14,"(34.05326, -118.25292)",Contractor,,
3,5144,21,31,H. F. SPENCER SUBDIVISION OF NORTH 1/2 BLOCK 5...,,LT NO 3,,19041-10000-02555,Issued,01/22/2019,Electrical,Commercial,No Plan Check,,,METRO,01/22/2019,813,,813,,S,FLOWER,ST,,,,90017,,0.0,,,,,VELRIO CONSTRUCTION INC,31805 TEMECULA PARKWAY STE 132,TEMECULA,CA,C-7,1017086,FRANCISCO,,DE JESUS VELEZ,08/31/2020,FRANCISCO,VELEZ,VELRIO CONSTRUCTION INC,,,,C2-4D,,0.0,2077.1,14,"(34.04705, -118.26051)",Contractor,,
4,5163,18,6,MILLS AND WICKS EXTENSION OF SECOND ST. AND AD...,,233,18LA03217,16016-10004-17710,Permit Finaled,06/13/2018,Bldg-Alter/Repair,Commercial,Plan Check,,,METRO,05/17/2018,1019,,1019,,E,4TH,PL,,,,90013,SUPPLEMENTAL TO 16016-10000-17710 WINDOW WASHI...,0.0,,,,,SHANGRI - LA CONSTRUCTION L P,550 S HOPE ST STE 700,LOS ANGELES,CA,B,926436,BENJAMIN,SALEM,WAHAB,12/31/2018,SHANGRI-LA,CONSTR.,,550 S. HOPE ST.,700,"LOS ANGELES, CA",M3-1-RIO,,,2060.31,14,,Agent for Contractor,13.0,


In [69]:
#data['Valuation'].sort_values().value_counts(sort=False).sort_index()
#data.groupby(['Permit_Type', 'Contractors_Business_Name'])['Valuation'].sum().sort_values(ascending=False).sort_index()

