In [1]:
# how many building footprints can we retrieve for each state?
# are we missing any compared to github documentation?

In [2]:
import geopandas as gpd
import json
import numpy as np
import pandas as pd
import psycopg2
import time
from shapely.wkt import loads as wkt_loads

from keys import pg_user, pg_pass, pg_host, pg_port, pg_db, pg_table

In [3]:
connection = psycopg2.connect(database=pg_db,
                              user=pg_user,
                              password=pg_pass,
                              host=pg_host,
                              port=pg_port)
connection.set_session(autocommit=True)
cursor = connection.cursor()

## Show random rows in table

In [4]:
# fast way to randomly sample n rows
n = 5
cursor.execute('SELECT MAX(id) FROM {table};'.format(table=pg_table))
max_id = cursor.fetchall()[0][0]
ids = tuple(np.random.randint(low=1, high=max_id, size=n))

query = """
        SELECT id, state, ST_AsText(geom)
        FROM {table}
        WHERE id IN {ids};
        """
cursor.execute(query.format(table=pg_table, ids=ids))
rows = cursor.fetchall()
gdf = gpd.GeoDataFrame(rows, columns=['id', 'state', 'geometry'])
gdf['geometry'] = gdf['geometry'].map(lambda x: wkt_loads(x))
gdf

Unnamed: 0,id,state,geometry
0,13115081,CA,"POLYGON ((-117.737544178963 34.0839276944483, ..."
1,28497456,GA,"POLYGON ((-83.87249297006549 33.869927847541, ..."
2,54916719,MI,"POLYGON ((-85.5521347750001 42.8655668508731, ..."
3,95246650,RI,"POLYGON ((-71.5374141248932 41.6696543323569, ..."
4,119201695,WV,"POLYGON ((-79.1211187440627 38.845070732438, -..."


## How many building footprints for each state?

In [5]:
%%time
# get all states in the footprints table
query = """
        SELECT DISTINCT {table}.state
        FROM {table};
        """
cursor.execute(query.format(table=pg_table))
rows = cursor.fetchall()
states = [item[0] for item in rows]
states = sorted(states)
print(states)

['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']
Wall time: 2min 14s


In [6]:
# get all footprints for each state
counts = {}
query = """
        SELECT ST_AsText({table}.geom)
        FROM {table}
        WHERE {table}.state='{state}';
        """

for state in states:
    
    start_time = time.time()
    
    cursor.execute(query.format(state=state, table=pg_table))
    rows = cursor.fetchall()
    counts[state] = len(rows)

    end_time = time.time() - start_time
    print(state, len(rows), '\t{:.1f}'.format(end_time))

AK 232159 	4.4
AL 2392171 	22.5
AR 1499025 	14.2
AZ 2492999 	32.7
CA 10556550 	107.5
CO 2043866 	22.2
CT 1156638 	11.6
DC 58330 	0.7
DE 331654 	3.2
FL 6532545 	62.0
GA 3801461 	36.1
HI 252894 	3.0
IA 2013085 	17.8
ID 883618 	8.7
IL 4783021 	43.1
IN 3224996 	30.3
KS 1564845 	14.4
KY 2363324 	22.0
LA 2005341 	19.3
MA 1982583 	20.0
MD 1590655 	15.4
ME 736346 	7.1
MI 4854138 	44.2
MN 2792296 	27.0
MO 3096410 	28.3
MS 1470285 	14.2
MT 762428 	7.5
NC 4504348 	41.5
ND 557809 	5.9
NE 1135526 	10.3
NH 558850 	5.5
NJ 2370475 	22.1
NM 985820 	10.2
NV 847575 	8.4
NY 4788312 	45.0
OH 5343670 	49.9
OK 2056402 	19.9
OR 1781820 	17.5
PA 4801561 	45.2
RI 348566 	4.0
SC 2134688 	20.0
SD 649233 	6.2
TN 2964339 	27.5
TX 9638970 	91.8
UT 980745 	11.0
VA 3020994 	28.7
VT 346038 	3.9
WA 2910981 	28.3
WI 3010755 	28.4
WV 1020048 	9.7
WY 376912 	3.7


In [7]:
pd.DataFrame(counts, index=['count']).T.to_csv('data/state_counts.csv', index=True, encoding='utf-8')

## All done

In [8]:
cursor.close()
connection.close()