# Generate GeoJSON Data
This notebook selects California zip codes, specified on the [CA.gov](https://data.ca.gov/dataset/county-and-zip-code-references) website, and generates a GeoJSON file with zip code perimiters. Some zip codes are points and are not contained in the source GeoJSON file, which comes from [Census.gov](https://www.census.gov/geographies/mapping-files/time-series/geo/kml-cartographic-boundary-files.html).

In [None]:
import json
import pandas as pd

In [None]:
# load zcta boundaries
with open('C:\\Users\\benjohn\\Documents\\Github\\CensusJSON\data\\cb_2018_us_zcta510_500k.geojson') as f:
    data = json.load(f)

# load list of california zips
ca_zips_df = pd.read_csv("../data/zip-code-list.csv")
ca_zips = ca_zips_df['zip_code'].tolist()

In [None]:
# transform kml to a dictionary
lkupKML = {}

for i in range(len(data['features'])):
    item = data['features'][i]
    tbl = item['properties']['description']
    df = pd.read_html(tbl)[0]
    zipCode = df['Attributes.1'][0]

    lkupKML[zipCode] = i


# select polygons
outputFeatures = []
missingZipCodes = []

for x in ca_zips:
    try:
        # info to be displayed
        ix = lkupKML[str(x)]
        feature = data['features'][ix]

        d = {}
        d['type'] = feature['type']
        d['properties'] = { "color": "#fff",}
        d['geometry'] = feature['geometry']

        outputFeatures.append(d)
    except:
        print(f"Zip code not found : {x}")
        missingZipCodes.append(x)
        

newData = {'type': 'FeatureCollection',
            'features': outputFeatures}

In [None]:
# save to json file
with open('../data/ca-zips.json', 'w') as outfile:
    json.dump(newData, outfile)