In [1]:
import pandas as pd
from urllib.request import urlopen
import json
import time
import geopandas as gpd
from shapely.geometry import shape

In [None]:
state_abbreviations = {
    'AL': 'Alabama',
    'AK': 'Alaska',
    'AZ': 'Arizona',
    'AR': 'Arkansas',
    'CA': 'California',
    'CO': 'Colorado',
    'CT': 'Connecticut',
    'DE': 'Delaware',
    'FL': 'Florida',
    'GA': 'Georgia',
    'HI': 'Hawaii',
    'ID': 'Idaho',
    'IL': 'Illinois',
    'IN': 'Indiana',
    'IA': 'Iowa',
    'KS': 'Kansas',
    'KY': 'Kentucky',
    'LA': 'Louisiana',
    'ME': 'Maine',
    'MD': 'Maryland',
    'MA': 'Massachusetts',
    'MI': 'Michigan',
    'MN': 'Minnesota',
    'MS': 'Mississippi',
    'MO': 'Missouri',
    'MT': 'Montana',
    'NE': 'Nebraska',
    'NV': 'Nevada',
    'NH': 'New Hampshire',
    'NJ': 'New Jersey',
    'NM': 'New Mexico',
    'NY': 'New York',
    'NC': 'North Carolina',
    'ND': 'North Dakota',
    'OH': 'Ohio',
    'OK': 'Oklahoma',
    'OR': 'Oregon',
    'PA': 'Pennsylvania',
    'RI': 'Rhode Island',
    'SC': 'South Carolina',
    'SD': 'South Dakota',
    'TN': 'Tennessee',
    'TX': 'Texas',
    'UT': 'Utah',
    'VT': 'Vermont',
    'VA': 'Virginia',
    'WA': 'Washington',
    'WV': 'West Virginia',
    'WI': 'Wisconsin',
    'WY': 'Wyoming',
    'DC': 'District of Columbia',
    'PR': 'Puerto Rico',
    'GU': 'Guam',
    'VI': 'Virgin Islands'
}

In [None]:
zip2state = pd.read_csv('Data/zip_prefixes_wiki.csv', dtype={'prefix':str})
zip2state['prefix'] = [x.zfill(3) for x in zip2state['prefix']]
zip2state = {val['prefix'] : val['state'] for ind, val in zip2state.iterrows()}

In [None]:
# read in main ZCTA geojson file from the data folder
with open('Data/all_zips_2020.geojson') as f:
    data = json.load(f)

# define container to hold all shapes for each state
collection = {}

# loop over each state and add the boilerplayte structure for the geojson
for state in state_abbreviations.values():
    collection[state] = {"type": "FeatureCollection",
                        "name": f"{state}_ZCTAs_2020",
                        "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
                        "features": []}

# loop over each zip code in the main census file and add to the appropriate state
for z in data['features']:
    code = str(z['properties']['ZCTA5CE20'][:3])
    full = z['properties']['ZCTA5CE20']
    state_name = state_abbreviations.get(zip2state.get(code))

    if state_name in state_abbreviations.values():
        collection[state_name]['features'].append(z)
    else:
        print(full)

# loop over each collection of zips by state and save as its own file
for s in list(collection.keys()):
    with open(f"file-path-to-destination-{s}", "w") as json_file:
        json.dump(collection[s], json_file, separators=(',', ':'))