# HTTP Request to SQL Tables
This notebook contains exploratory code, putting together the full "pipeline" from making an HTTP request to the King County website, to having all data loaded into SQL

In [1]:
import psycopg2
import pandas as pd

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

In [4]:
from src.data import sql_utils
from src.data import data_collection

We assume that the user has PostgreSQL installed, but not that they have a database with the right name

In [5]:
sql_utils.create_database()
sql_utils.create_tables()

In [6]:
sales_files, buildings_files, parcels_files = data_collection.collect_all_data_files()

In [7]:
sales_zip_file, sales_csv_file = sales_files
buildings_zip_file, buildings_csv_file = buildings_files
parcels_zip_file, parcels_csv_file = parcels_files

In [8]:
sql_utils.copy_csv_files(sales_csv_file, buildings_csv_file, parcels_csv_file)

In [9]:
sales_zip_file.close()
sales_csv_file.close()
buildings_zip_file.close()
buildings_csv_file.close()
parcels_zip_file.close()
parcels_csv_file.close()

Now everything should be saved in a database running locally

In [10]:
conn = psycopg2.connect(dbname="housing_data")

In [11]:
pd.read_sql_query("SELECT * FROM buildings LIMIT 5;", conn)

Unnamed: 0,major,minor,bldgnbr,nbrlivingunits,address,buildingnumber,fraction,directionprefix,streetname,streettype,...,fpmultistory,fpfreestanding,fpadditional,yrbuilt,yrrenovated,pcntcomplete,obsolescence,pcntnetcondition,condition,addnlcost
0,12000,32,1,1,830 8TH AVE S 98033,830,,,8TH,AVE,...,0,0,0,1981,0,0,0,0,3,0
1,12000,260,1,1,341 8TH ST S 98033,341,,,8TH,ST,...,0,0,0,1928,2007,0,0,0,3,0
2,12000,262,1,1,712 5TH AVE S 98033,712,,,5TH,AVE,...,0,0,0,2010,0,0,0,0,3,0
3,12005,9035,1,1,39525 AUBURN-ENUMCLAW RD SE 98092,39525,,,AUBURN-ENUMCLAW,RD,...,0,0,0,1917,0,0,0,0,1,0
4,12006,9079,1,1,39007 272ND AVE SE 98022,39007,,,272ND,AVE,...,0,0,0,1990,0,0,0,0,4,0


In [12]:
conn.close()

In [13]:
# one more refactor, put everything into a single driver function
data_collection.download_data_and_load_into_sql()

In [14]:
conn = psycopg2.connect(dbname="housing_data")
pd.read_sql_query("SELECT * FROM buildings LIMIT 5;", conn)

Unnamed: 0,major,minor,bldgnbr,nbrlivingunits,address,buildingnumber,fraction,directionprefix,streetname,streettype,...,fpmultistory,fpfreestanding,fpadditional,yrbuilt,yrrenovated,pcntcomplete,obsolescence,pcntnetcondition,condition,addnlcost
0,12000,32,1,1,830 8TH AVE S 98033,830,,,8TH,AVE,...,0,0,0,1981,0,0,0,0,3,0
1,12000,260,1,1,341 8TH ST S 98033,341,,,8TH,ST,...,0,0,0,1928,2007,0,0,0,3,0
2,12000,262,1,1,712 5TH AVE S 98033,712,,,5TH,AVE,...,0,0,0,2010,0,0,0,0,3,0
3,12005,9035,1,1,39525 AUBURN-ENUMCLAW RD SE 98092,39525,,,AUBURN-ENUMCLAW,RD,...,0,0,0,1917,0,0,0,0,1,0
4,12006,9079,1,1,39007 272ND AVE SE 98022,39007,,,272ND,AVE,...,0,0,0,1990,0,0,0,0,4,0
