# Building Permit Data

## Documentation

[United States Census Bureau Building Permits Survey](https://www.census.gov/construction/bps/)

[ASCII files by State, Metropolitan Statistical Area (MSA), County or Place](https://www2.census.gov/econ/bps/)

[MSA Folder](https://www2.census.gov/econ/bps/Metro/)

[ASCII MSA Documentation](https://www2.census.gov/econ/bps/Documentation/msaasc.pdf)

In [1]:
import numpy as np
import pandas as pd

import re

import os.path
from os import path

from datetime import datetime

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

from scipy import stats

from sklearn.preprocessing import MinMaxScaler, StandardScaler, PowerTransformer
from sklearn.cluster import KMeans

import wrangle as wr
import preprocessing_permits as pr
import explore as ex
import model as mo

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pr.get_permits_model_df()

In [3]:
df.shape

(8269, 6)

In [4]:
df = pr.add_new_features(df)

In [5]:
df.shape

(8269, 15)

In [6]:
df = pr.filter_top_cities_building_permits(df)

In [7]:
df.shape

(2990, 16)

In [8]:
df.shape

(2990, 16)

In [10]:
df, kmeans, centroids, scaler, scaled_ei_threshold_value, X = pr.create_clusters(df)

In [12]:
df = pr.create_cluster_labels(df)

In [13]:
df

Unnamed: 0,city,state,year,total_high_density_bldgs,total_high_density_units,total_high_density_value,avg_units_per_bldg_x,value_per_bldg,value_per_unit,city_state_high_density_bldgs_delta_pct,...,city_state_high_density_value_delta_pct,market_volume,market_volume_delta_pct,ei_x,city_state,cluster,test_future_cluster,should_enter,avg_units_per_bldg_y,ei_y
0,Albany,NY,1998,47.0,1038.0,54232000.0,0.499727,1.153872e+06,52246.628131,0.566667,...,2.034637,2.529787e+10,0.231085,1.200626,Albany_NY,3,5.0,False,0.024977,1.710169
1,Albany,NY,1999,39.0,515.0,24484000.0,-0.493115,6.277949e+05,47541.747573,-0.170213,...,-0.548532,2.609590e+10,0.031545,-1.118050,Albany_NY,5,1.0,False,0.136955,-0.881078
2,Albany,NY,2000,25.0,346.0,16130000.0,-0.398138,6.452000e+05,46618.497110,-0.358974,...,-0.341202,2.742204e+10,0.050818,-0.655825,Albany_NY,5,1.0,False,0.136955,-0.881078
3,Albany,NY,2001,56.0,502.0,24536000.0,-1.307457,4.381429e+05,48876.494024,1.240000,...,0.521141,2.913103e+10,0.062322,0.503915,Albany_NY,1,1.0,False,-0.996359,0.128136
4,Albany,NY,2002,47.0,475.0,29634000.0,-1.049695,6.305106e+05,62387.368421,-0.160714,...,0.207776,3.169504e+10,0.088016,0.146820,Albany_NY,1,1.0,False,-0.996359,0.128136
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2855,York,PA,2015,3.0,53.0,3185000.0,0.081753,1.061667e+06,60094.339623,-0.666667,...,-0.449724,5.200240e+10,0.313639,-1.170629,York_PA,5,1.0,False,0.136955,-0.881078
2856,York,PA,2016,3.0,34.0,1566000.0,-0.808043,5.220000e+05,46058.823529,0.000000,...,-0.508320,4.928300e+10,-0.052294,-0.905714,York_PA,2,2.0,False,-1.407995,-1.633774
2857,York,PA,2017,15.0,83.0,10204000.0,-2.387242,6.802667e+05,122939.759036,4.000000,...,5.515964,5.158824e+10,0.046775,2.050569,York_PA,1,3.0,False,-0.996359,0.128136
2858,York,PA,2018,2.0,26.0,2160000.0,-0.524968,1.080000e+06,83076.923077,-0.866667,...,-0.788318,5.336251e+10,0.034393,-1.887526,York_PA,2,,False,-1.407995,-1.633774


In [14]:
df.to_csv('cluster_maps.csv')