# Census Data Download Script
This script is for downloading the specified file directories from the US Census Geo-Tiger FTP Site. Please note that this script must be run in Anaconda's Jupyter Lab, not the ArcPro Jupyter Lab, in order to successfully import all modules.

Users must specify the correct local file directory to reposit the downloaded data. This script will automatically create folders within the specified file directory, into which the data will be downloaded and unzipped. 

Users must also specificy the url they want to download from - this script assumes 2018 TIGER, but can be changed to any year available from Census FTP: https://www2.census.gov/geo/tiger/



In [None]:
import requests, zipfile, io, csv, os
from bs4 import BeautifulSoup
from zipfile import BadZipfile


# Where you want the files to download to (will create new folders within this workspace) 
theShapeFileDirectory = r"C:\Users\allow001\work\BlogMaps"

# The census website. NOTE: this URL will have to be updated to 'TIGER2019/' and so on.
downloadUrl = "https://www2.census.gov/geo/tiger/" + "TIGER2018/"

# Census download site folder names below
#NOTE: if the folder names change on the census download directory for a new year, change the names in this list

#folders = ['AIANNH','AITSN','ANRC','BG','CONCITY','COUSUB','CNECTA','CBSA','CD',
           #'COUNTY','CSA','CSA','METDIV','NECTA','NECTADIV',
           #'PLACE','PUMA','ELSD','SCSD','UNSD','STATE','SLDL','SLDU',
           #'SUBMCD','TBG','TRACT','TTRACT','UAC','ZCTA5', 
           #'AREAWATER','LINEARWATER','COASTLINE',
           #'PRIMARYROADS','PRISECROADS','RAILS','ROADS']
            
#use the list below for testing a couple of folders
#uncomment the full list above to download all data folders
folders = ['AREAWATER']

#print statements are included throughout as checks
#uncomment them to run the check, commenting out rest of script

for folder in folders:
    fullUrl = downloadUrl + folder + "/"
    #print(fullUrl) 
    r = requests.get(fullUrl)
    webpage = BeautifulSoup(r.text, "html.parser")
    table = webpage.find("table")
    # Find all table row (tr rows)
    tr = table.find_all("tr")
    #print(tr)
    hrefs = []
    # Skipping headers
    for each_tr in tr[3:]:
        td = each_tr.find_all('td')
        # In each tr row find each td cell
        for each_td in td:
            #print(each_td.text) # If you want to check that you are downloading the correct census shapefiles
            if each_td.find('a'): hrefs.append(each_td.find('a')['href'])
    print("Downloading and Extracting files")
    
    
    for c, h in enumerate(hrefs):
        print("Downloading %s of %s" % (c+1, len(hrefs)) )
        urlZip = "%s/%s" % (fullUrl, h)
        theZip = requests.get(urlZip)
        try:
            z = zipfile.ZipFile(io.BytesIO(theZip.content))
            # Creates a file folder to put each type of data into, inside of theShapefileDirectory specified above
            if not os.path.exists(os.path.join(theShapeFileDirectory,folder)):
                os.makedirs(os.path.join(theShapeFileDirectory,folder))
                destination = (os.path.join(theShapeFileDirectory,folder))
            # if the file folder with that name already exists, it won't duplicate
            else:
                destination = (os.path.join(theShapeFileDirectory,folder))
            z.extractall(destination)
        #if the zipfile is corrupted, it will pass it and continue the script
        except BadZipfile:
            pass
            
        
