In [1]:
import psycopg2
import pandas as pd
import geopandas as gpd

from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
from geoalchemy2 import Geometry, WKTElement

In [2]:
dbname = 'map_the_vote'
username = 'codyschank'

engine = create_engine('postgres://%s@localhost/%s'%(username,dbname))
print(engine.url)

if not database_exists(engine.url):
    create_database(engine.url)
print(database_exists(engine.url))

postgres://codyschank@localhost/map_the_vote2
True


In [None]:
#if database is newly created, will have to run this the first time
#sql_query = """
#CREATE EXTENSION postgis;
#"""
#engine.execute(sql_query)

## Zip Codes (National File)

In [4]:
zip_shapefile = gpd.read_file("/Users/codyschank/Dropbox/Insight/Geography/Zip_Codes/tl_2017_us_zcta510.shp", encoding = 'utf-8')
#project to geographic coordinates to match openaddresses
zip_shapefile = zip_shapefile.to_crs({'init': 'epsg:4326'})
zip_shapefile.columns = map(str.lower, zip_shapefile.columns)

In [5]:
# I don't know why this is necessary, but it is 
zip_shapefile['geom'] = zip_shapefile['geometry'].apply(lambda x: WKTElement(x.wkt, srid=4326))
zip_shapefile.drop('geometry', 1, inplace=True)

<sqlalchemy.engine.result.ResultProxy at 0x122ac8d68>

In [8]:
# issue with multipolygons requires upload the schema first, alter the geom column, then upload the data
table_name = "zip5_us"
zip_shapefile.head(0).to_sql(table_name, engine, if_exists='replace', index=False, 
                                dtype={'geom': Geometry('Polygon', srid= 4326)})

In [9]:
sql_query = """
ALTER TABLE zip5_us ALTER COLUMN geom SET DATA TYPE geometry;
"""
engine.execute(sql_query)

<sqlalchemy.engine.result.ResultProxy at 0x122afd128>

In [10]:
table_name = "zip5_us"
#had to run this from command ALTER TABLE zip5_us ALTER COLUMN geom SET DATA TYPE geometry;
zip_shapefile.head(1000).to_sql(table_name, engine, if_exists='append', index=False, 
                                dtype={'geom': Geometry('Polygon', srid= 4326)})

In [11]:
chunk_size = 1000
for i in range(1000, zip_shapefile.shape[0]+chunk_size, chunk_size):
    zip_shapefile[i:(i+chunk_size)].to_sql(table_name, engine, if_exists='append', index=False, 
                                            dtype={'geom': Geometry('Polygon', srid= 4326)})
    print(i)

1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000


## Congressional Districts (National File)

In [12]:
tl_2017_us_cd115 = gpd.read_file("/Users/codyschank/Dropbox/Insight/Geography/Congressional_Districts/tl_2017_us_cd115.shp", encoding = 'utf-8')
#project to geographic coordinates to match openaddresses
tl_2017_us_cd115 = tl_2017_us_cd115.to_crs({'init': 'epsg:4326'})
tl_2017_us_cd115.columns = map(str.lower, tl_2017_us_cd115.columns)

In [13]:
# I don't know why this is necessary, but it is 
tl_2017_us_cd115['geom'] = tl_2017_us_cd115['geometry'].apply(lambda x: WKTElement(x.wkt, srid=4326))
tl_2017_us_cd115.drop('geometry', 1, inplace=True)

In [14]:
# issue with multipolygons requires upload the schema first, alter the geom column, then upload the data
table_name = "us_congressional_districts"
tl_2017_us_cd115.head(0).to_sql(table_name, engine, if_exists='replace', index=False, 
                                dtype={'geom': Geometry('Polygon', srid= 4326)})

In [15]:
sql_query = """
ALTER TABLE us_congressional_districts ALTER COLUMN geom SET DATA TYPE geometry;
"""
engine.execute(sql_query)

<sqlalchemy.engine.result.ResultProxy at 0x12291d1d0>

In [16]:
tl_2017_us_cd115.to_sql(table_name, engine, if_exists='append', index=False, 
                                dtype={'geom': Geometry('POLYGON', srid= 4326)})

## VTDs (Texas)

In [17]:
precinct_shapefile = gpd.read_file("/Users/codyschank/Dropbox/Insight/Geography/VTDs/VTDs.shp", encoding = 'utf-8')
#SRID is 3081, checked in QGIS
#project to geographic coordinates to match openaddresses
precinct_shapefile = precinct_shapefile.to_crs({'init': 'epsg:4326'})
precinct_shapefile.columns = map(str.lower, precinct_shapefile.columns)

In [18]:
# I don't know why this is necessary, but it is 
precinct_shapefile['geom'] = precinct_shapefile['geometry'].apply(lambda x: WKTElement(x.wkt, srid=4326))
precinct_shapefile.drop('geometry', 1, inplace=True)

In [19]:
table_name = "vtds_tx"
precinct_shapefile.to_sql(table_name, engine, if_exists='replace', index=False, 
                                dtype={'geom': Geometry('POLYGON', srid= 4326)})