# Download Script

In [None]:
import requests, zipfile, io, csv, os
from bs4 import BeautifulSoup
from zipfile import BadZipfile

# Where you want the files to download to (will create new folders within this workspace) 
theShapeFileDirectory = r"C:\Users\and04671\Documents\Data_Files"

# The census website. NOTE: this URL will have to be updated to '.../TIGER2019/' and so on, each year
theUrl = "https://www2.census.gov/geo/tiger/TIGER2018/"

# Census download site folder names below
#NOTE: if the folder names change on the census download directory for a new year, change the names in this list

folders = ['AIANNH/','AITSN/','ANRC/','BG/','CONCITY/','COUSUB','CNECTA/','CBSA/','CD/',
           'COUNTY/','CSA/','CSA/','METDIV/','NECTA/','NECTADIV/',
           'PLACE/','PUMA/','ELSD/','SCSD/','UNSD/','STATE/','SLDL/','SLDU/',
           'SUBMCD/','TBG/','TRACT/','TTRACT/','UAC/','ZCTA5/',
            
           #these don't seem to be in 2017 list but want to keep 
           'AREAWATER/','LINEARWATER/','COASTLINE/','FACES/',
           'FACESAH/','FACESAL/','FACESMIL/','FEATNAMES/','PRIMARYROADS/',
           'PRISECROADS/','RAILS/','ROADS/']

for folder in folders:
    fullUrl = theUrl + folder
    r = requests.get(fullUrl)
    webpage = BeautifulSoup(r.text, "html.parser")
    table = webpage.find("table")
    # Find all table row (tr rows)
    tr = table.find_all("tr")
    #print(tr)
    hrefs = []
    # Skipping headers
    for each_tr in tr[3:]:
        td = each_tr.find_all('td')
        # In each tr row find each td cell
        for each_td in td:
            #print(each_td.text) # If you want to check that you are downloading the correct census shapefiles
            if each_td.find('a'): hrefs.append(each_td.find('a')['href'])
    print("Downloading and Extracting files")
    
    
    for c, h in enumerate(hrefs):
        print("Downloading %s of %s" % (c+1, len(hrefs)) )
        urlZip = "%s/%s" % (fullUrl, h)
        theZip = requests.get(urlZip)
        try:
            z = zipfile.ZipFile(io.BytesIO(theZip.content))
            # Creates a file folder to put each type of data into, inside of theShapefileDirectory specified above
            if not os.path.exists(os.path.join(theShapeFileDirectory,folder[:-1])):
                os.makedirs(os.path.join(theShapeFileDirectory,folder[:-1]))
                destination = (os.path.join(theShapeFileDirectory,folder[:-1]))
            # if the file folder with that name already exists, it won't duplicate
            else:
                destination = (os.path.join(theShapeFileDirectory,folder[:-1]))
            z.extractall(destination)
        except BadZipfile:
            pass


# Copy shapes to geodatabase

In [None]:
import os
import sys
import zipfile
import string
import arcpy

#constants
working_directory = "C:\Users\and04671\Documents\Data_Files"
#this dirs list includes all directories defined in the download script, without 
#additional water/road directories
folders = ['AIANNH','AITSN','ANRC','BG','CBSA','CD','CNECTA','CONCITY','COUNTY',
        'COUSUB','CSA','ELSD','METDIV','NECTA','NECTADIV','PLACE','PUMA',
        'SCSD','SLDL','SLDU','STATE','SUBMCD','TBG','TRACT','TTRACT','UAC',
        'UNSD','ZCTA5']
#dirs = ['AIANNH']
#dirs = ['COUSUB']
out_db = r"C:\Users\and04671\Documents\Data_Files\TIGER2018.gdb"

#0. get the shapefiles into the geodb, merging shapefiles that come as state-wide extents
for folder in folders:
    shp_dir = working_directory + folder + "\"
    
    # list all shapefiles in directory
    arcpy.env.workspace = shp_dir
    shps = arcpy.ListFeatureClasses("*")
    if len(shps) == 1:
        shp_file = shp_dir + shps[0]
        shp_name = shps[0]
        shp_name = shp_name.strip("shp")
        out_fc = out_db + shp_name
        arcpy.CopyFeatures_management(shp_file,out_fc)
    else:
        out_fc = out_db + "tl_2017_us_" + dir.lower()
        arcpy.Merge_management(shps,out_fc)
        
        

# Add GISjoin

In [None]:
import os
import sys
import zipfile
import string
import arcpy
#G-"State Code (2)"-0-"County Code(3)"-0
wd = "D:/workspace/TIGER2017/"
out_db = "D:/workspace/TIGER2017/TIGER2017_proj.gdb/"
dir_dict = {'AIANHH':5,'AITSN':8,'ANRC':6,'BG':15,'CBSA':6,'CD115':6,'CNECTA':4,'CONCITY':9,'COUNTY':8,'COUSUB':13,'CSA':4,'DIVISION':2,'ELSD':9,'METDIV':11,'NATION':2,'NECTA':6,'NECTADIV':11,'PLACE':9,'PUMA':9,'SCSD':9,'SLDL':7,'SLDU':7,'STATE':4,'SUBMCD':18,'TBG':12,'TRACT':14,'TTRACT':11,'UAC10':6,'UNSD':9,'ZCTA510':6,'TRUST':5,'RES_ONLY':5,'REGION':2}
#dir_dict = {'PLACE':9,'PUMA':9,'SCSD':9,'SLDL':7,'SLDU':7,'STATE':4,'SUBMCD':18,'TBG':12,'UAC':6,'UNSD':9,'ZCTA5':6,'TRUST':5,'RES_ONLY':5,'REGION':2}
#dir_dict = {'CD115': 6, 'COUNTY': 8, 'COUSUB': 13, 'STATE': 4, 'CBSA': 6, 'PLACE': 9, 'ELSD': 9, 'ANRC': 6, 'SLDL': 7, 'NATION': 2, 'NECTADIV': 11, 'AIANHH': 5, 'CNECTA': 4, 'PUMA': 9, 'TTRACT': 11, 'REGION': 2, 'AITSN': 8, 'METDIV': 11, 'UNSD': 9, 'TBG': 12, 'SCSD': 9, 'ZCTA5': 6, 'UAC': 6, 'TRUST': 5, 'NECTA': 6}
#dir_dict = {'TTRACT': 11, 'TRUST': 5, 'STATE': 4, 'REGION': 2, 'PUMA': 9, 'PLACE': 9, 'NECTA': 6
new_field = "GISJOIN"
arcpy.env.workspace = out_db

for j,i in dir_dict.items():
    j_lower = j.lower()
    fc = arcpy.ListFeatureClasses("*"+j_lower)
    fc_path = out_db + fc[0]
    arcpy.AddField_management(fc_path,new_field,"TEXT","","",i,"","","","")
    


# Water merge

In [None]:
import os
import sys
import zipfile
import string
import arcpy

#constants
wd = "D:/workspace/TIGER2017/"
out_db = "D:/workspace/TIGER2017/TIGER2017_water.gdb/"
dir = 'AREAWATER'

shp_dir = wd + dir + "/"
arcpy.env.workspace = shp_dir
shps = arcpy.ListFeatureClasses("*")
out_fc = out_db + "tl_2017_us_" + dir.lower()
arcpy.Merge_management(shps,out_fc)


# Rename Statewide

In [None]:
##########################################################################################
# 4_rename_statewide_bg_fc.py
# 
# David Van Riper
#
# 12/14/2017
# 
# This script renames the state-specific block group feature classes by
# postal abbreviations
#
# Local processing works great!!!
##########################################################################################

import os
import sys
import zipfile
import string
import arcpy

wd = "D:/workspace/TIGER2017/"
geodb = wd + "TIGER2017_proj_watererase.gdb"
out_fc_stub = "_blck_grp_2017"

state_codes = {
 '53':'WA',
 '10':'DE',
 '11':'DC',
 '55':'WI',
 '54':'WV',
 '15':'HI',
 '12':'FL',
 '56':'WY',
 '72':'PR',
 '34':'NJ',
 '35':'NM',
 '48':'TX',
 '22':'LA',
 '37':'NC',
 '38':'ND',
 '31':'NE',
 '47':'TN',
 '36':'NY',
 '42':'PA',
 '02':'AK',
 '32':'NV',
 '33':'NH',
 '51':'VA',
 '08':'CO',
 '06':'CA',
 '01':'AL',
 '05':'AR',
 '50':'VT',
 '17':'IL',
 '13':'GA',
 '18':'IN',
 '19':'IA',
 '25':'MA',
 '04':'AZ',
 '16':'ID',
 '09':'CT',
 '23':'ME',
 '24':'MD',
 '40':'OK',
 '39':'OH',
 '49':'UT',
 '29':'MO',
 '27':'MN',
 '26':'MI',
 '44':'RI',
 '20':'KS',
 '30':'MT',
 '28':'MS',
 '45':'SC',
 '21':'KY',
 '41':'OR',
 '46':'SD',
 '72':'PR' 
}

arcpy.env.workspace = geodb
fcs = arcpy.ListFeatureClasses("bg_*")

for fc in fcs:
    fc_name = fc.split("_")
    fc_state = fc_name[1]
    state_abbr = state_codes.get(fc_state)
    out_fc = state_abbr + out_fc_stub
    arcpy.Rename_management(fc,out_fc)
    



# Export to SHP zip

In [None]:
##########################################################################################
# 5_export_to_shp_and_zip.py
# 
# David Van Riper
#
# 12/15/2017
#
# This script exports all fcs in a geodb to shapefiles and ZIPS them using the
# NHGIS naming convention
#
# Local processing works great!!!
##########################################################################################

import os
import sys
import glob
import zipfile
import string
import arcpy

wd = "D:/workspace/TIGER2017/"
#geodb = wd + "TIGER2016_proj_watererase.gdb"
out_folder = wd + "shps/"

#arcpy.env.workspace = geodb
#fcs = arcpy.ListFeatureClasses("*")

#for fc in fcs:
#    out_shp = fc + ".shp"
#    arcpy.FeatureClassToFeatureClass_conversion(fc,out_folder,out_shp)

os.chdir(out_folder)

shps = glob.glob(out_folder + "*.shp")

ziplist = []

for name in shps:
    file = os.path.basename(name)
    names = file[:-4]
    ziplist.append(names)

for f in ziplist:
    file_name = os.path.join(out_folder,f + ".zip")
    files = glob.glob(str(f)+".*")
    zips = zipfile.ZipFile(file_name,'w')
    for s in files:
        zips.write(s, compress_type = zipfile.ZIP_DEFLATED)
    zips.close()    



# Rename ZIPS

In [None]:
##########################################################################################
# 6_rename_zipfiles.py
# 
# David Van Riper
#
# 12/15/2017
#
# This script renames shapefiles to lower case 'us' and three character
# fips codes. 
#
# Local processing works great!!!
##########################################################################################

import os
import sys
import glob
import zipfile
import string
#import arcpy

states_upper = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
          "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY", "PR"]

state_codes = {
    'WA': '530',
    'DE': '100',
    'DC': '110',
    'WI': '550',
    'WV': '540',
    'HI': '150',
    'FL': '120',
    'WY': '560',
    'PR': '720',
    'NJ': '340',
    'NM': '350',
    'TX': '480',
    'LA': '220',
    'NC': '370',
    'ND': '380',
    'NE': '310',
    'TN': '470',
    'NY': '360',
    'PA': '420',
    'AK': '020',
    'NV': '320',
    'NH': '330',
    'VA': '510',
    'CO': '080',
    'CA': '060',
    'AL': '010',
    'AR': '050',
    'VT': '500',
    'IL': '170',
    'GA': '130',
    'IN': '180',
    'IA': '190',
    'MA': '250',
    'AZ': '040',
    'ID': '160',
    'CT': '090',
    'ME': '230',
    'MD': '240',
    'OK': '400',
    'OH': '390',
    'UT': '490',
    'MO': '290',
    'MN': '270',
    'MI': '260',
    'RI': '440',
    'KS': '200',
    'MT': '300',
    'MS': '280',
    'SC': '450',
    'KY': '210',
    'OR': '410',
    'SD': '460',
    'PR': '720'
}

wd = "D:/workspace/TIGER2017/shps/"

os.chdir(wd)

zips = glob.glob(wd + "US*.zip")

ziplist = []

for name in zips:
    file = os.path.basename(name)
    ziplist.append(file)

for f in ziplist:
    new_zip = f.lower()
    os.rename(f,new_zip)

for state in states_upper:
    in_zip = glob.glob(wd + state + "*.zip")
    in_zip_string = in_zip[0]
    in_zip_file = os.path.basename(in_zip_string)

    out_zip_file = state_codes.get(state) + "_blck_grp_2017.zip"
    os.rename(in_zip_file, out_zip_file)
    
                 