In [1]:
import pandas as pd
from urllib.request import urlopen
import json
import time
import geopandas as gpd
from shapely.geometry import shape

In [2]:
state_abbreviations = {
    'AL': 'Alabama',
    'AK': 'Alaska',
    'AZ': 'Arizona',
    'AR': 'Arkansas',
    'CA': 'California',
    'CO': 'Colorado',
    'CT': 'Connecticut',
    'DE': 'Delaware',
    'FL': 'Florida',
    'GA': 'Georgia',
    'HI': 'Hawaii',
    'ID': 'Idaho',
    'IL': 'Illinois',
    'IN': 'Indiana',
    'IA': 'Iowa',
    'KS': 'Kansas',
    'KY': 'Kentucky',
    'LA': 'Louisiana',
    'ME': 'Maine',
    'MD': 'Maryland',
    'MA': 'Massachusetts',
    'MI': 'Michigan',
    'MN': 'Minnesota',
    'MS': 'Mississippi',
    'MO': 'Missouri',
    'MT': 'Montana',
    'NE': 'Nebraska',
    'NV': 'Nevada',
    'NH': 'New Hampshire',
    'NJ': 'New Jersey',
    'NM': 'New Mexico',
    'NY': 'New York',
    'NC': 'North Carolina',
    'ND': 'North Dakota',
    'OH': 'Ohio',
    'OK': 'Oklahoma',
    'OR': 'Oregon',
    'PA': 'Pennsylvania',
    'RI': 'Rhode Island',
    'SC': 'South Carolina',
    'SD': 'South Dakota',
    'TN': 'Tennessee',
    'TX': 'Texas',
    'UT': 'Utah',
    'VT': 'Vermont',
    'VA': 'Virginia',
    'WA': 'Washington',
    'WV': 'West Virginia',
    'WI': 'Wisconsin',
    'WY': 'Wyoming',
    'DC': 'District of Columbia',
    'PR': 'Puerto Rico',
    'GU': 'Guam',
    'VI': 'Virgin Islands'
}

In [None]:
def json_to_geojson(input_json_file, output_geojson_file, tolerance=0.0001):
    # Step 1: Read the JSON file
    url = input_json_file
    with urlopen(url) as response:
        state_zip_json = json.load(response)
    
    # Step 2: Convert JSON to GeoDataFrame
    # Assuming JSON has features in GeoJSON-like structure
    features = state_zip_json.get('features', [])
    geometries = [shape(feature['geometry']) for feature in features]
    properties = [feature.get('properties', {}) for feature in features]
    
    # Create a GeoDataFrame
    gdf = gpd.GeoDataFrame(properties, geometry=geometries, crs='EPSG:4326')
    
    # Step 3: Simplify the geometries by specified tolerance
    gdf['geometry'] = gdf['geometry'].simplify(tolerance)
    
    # Step 4: Save as GeoJSON
    gdf.to_file(output_geojson_file, driver='GeoJSON')

In [None]:
for name in state_abbreviations.values():
    name_url = name.replace(' ', '%20')
    print(f"Working to simplify file for {name}!")
    input_file = f"https://raw.githubusercontent.com/aha1994/zcta2020/main/2020%20Census%20Full%20Detail/{name_url}_ZCTAs_2020.json"
    output_file = f"census2020/ZCTAs2020/{name}_ZCTAs_simplified_2020.json"
    json_to_geojson(input_file, output_file, tolerance=0.0005)

print('Done! Your files are ready.')