# Generate geo-json files from [US Census Bureau data](https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html).

License: Apache 2

In [0]:
!pip install --quiet kml2geojson

In [0]:
import io
import copy
import json
import urllib.request
import xml.dom.minidom
import zipfile

import kml2geojson
import lxml.etree

In [0]:
def parse_broken_kml(contents):
    fixing_tree = lxml.etree.fromstring(
        contents, parser=lxml.etree.XMLParser(recover=True))
    tree = xml.dom.minidom.parseString(lxml.etree.tostring(fixing_tree))
    return kml2geojson.build_layers(tree)

In [0]:
def read_2018_census_kml(filename):
  url = "https://www2.census.gov/geo/tiger/GENZ2018/kml/" + filename + ".zip"
  with urllib.request.urlopen(url) as infile:
    buffer = io.BytesIO(infile.read())
  return zipfile.ZipFile(buffer).read(filename + ".kml")

In [0]:
raw_states = parse_broken_kml(read_2018_census_kml("cb_2018_us_state_20m"))[0]

In [0]:
states = copy.deepcopy(raw_states)
for entry in states["features"]:
  props = entry["properties"]
  props.pop("ALAND")
  props.pop("AWATER")
  props["is_a_state"] = True
  props["state_id"] = int(entry["properties"].pop("GEOID"))
  props["name"] = props.pop("NAME")
  props.pop("STATEFP")
  props.pop("STATENS")
  props.pop("LSAD")
  props.pop("description")
  props.pop("styleUrl")

In [0]:
raw_counties = parse_broken_kml(read_2018_census_kml("cb_2018_us_county_20m"))[0]

In [0]:
counties = copy.deepcopy(raw_counties)
for entry in counties["features"]:
  props = entry["properties"]
  props.pop("ALAND")
  props.pop("AWATER")
  props.pop("COUNTYFP")
  props.pop("COUNTYNS")
  props["fips_id"] = props.pop("GEOID")
  props.pop("LSAD")
  props["name"] = props.pop("NAME")
  props["state_id"] = props.pop("STATEFP")
  props.pop("description")
  props.pop("styleUrl")
  props["is_a_state"] = False

In [0]:
state_names = {int(entry["properties"]["state_id"]): entry["properties"]["name"]
               for entry in states["features"]}

In [0]:
with open("counties.json", "w") as outfile:
  json.dump(counties, outfile)

In [0]:
with open("states.json", "w") as outfile:
  json.dump(states, outfile)

In [0]:
with open("state_names.json", "w") as outfile:
  json.dump(state_names, outfile)

In [0]:
!tar -czvf json_us_geography.tar.gz counties.json states.json state_names.json

counties.json
states.json
state_names.json
