In [1]:
!pip install wget



In [2]:
import wget

url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_500k.zip"
destination = "cb_2018_us_state_500k.zip"

wget.download(url, destination)

'cb_2018_us_state_500k (5).zip'

In [3]:
import zipfile
import os

# Path to the downloaded ZIP file
zip_file = "cb_2018_us_state_500k.zip"
# Directory to extract the contents to
extract_dir = "data"

# Create the directory if it doesn't exist
os.makedirs(extract_dir, exist_ok=True)

# Extract the contents of the ZIP file
with zipfile.ZipFile(zip_file, "r") as zip_ref:
    zip_ref.extractall(extract_dir)

In [4]:
import geopandas as gpd

# Load the shapefile
shapefile_path = os.path.join("data", "cb_2018_us_state_500k.shp")
gdf = gpd.read_file(shapefile_path)

# Now you can work with the GeoDataFrame 'gdf' as needed
print(gdf.head())

  STATEFP   STATENS     AFFGEOID GEOID STUSPS            NAME LSAD  \
0      28  01779790  0400000US28    28     MS     Mississippi   00   
1      37  01027616  0400000US37    37     NC  North Carolina   00   
2      40  01102857  0400000US40    40     OK        Oklahoma   00   
3      51  01779803  0400000US51    51     VA        Virginia   00   
4      54  01779805  0400000US54    54     WV   West Virginia   00   

          ALAND       AWATER  \
0  121533519481   3926919758   
1  125923656064  13466071395   
2  177662925723   3374587997   
3  102257717110   8528531774   
4   62266474513    489028543   

                                            geometry  
0  MULTIPOLYGON (((-88.50297 30.21523, -88.49176 ...  
1  MULTIPOLYGON (((-75.72681 35.93584, -75.71827 ...  
2  POLYGON ((-103.00257 36.52659, -103.00219 36.6...  
3  MULTIPOLYGON (((-75.74241 37.80835, -75.74151 ...  
4  POLYGON ((-82.64320 38.16909, -82.64300 38.169...  


In [5]:
from shapely.geometry import mapping
import numpy as np

# Define a dictionary to store state coordinates
state_coords = {}

# Iterate over each state in the GeoDataFrame
for idx, row in gdf.iterrows():
    # Extract the geometry of the state
    geometry = row["geometry"]
    # Convert the geometry to a GeoJSON object
    geojson_obj = mapping(geometry)
    # Extract the coordinates from the GeoJSON object
    geo_coords = geojson_obj["coordinates"]
    # Determine the index of the largest polygon (if there are multiple)
    idx = np.argmax([len(coords[0]) for coords in geo_coords])
    # Store the coordinates of the largest polygon in the dictionary
    state_coords[row["NAME"]] = geo_coords[idx]

In [6]:
# Define a dictionary to store simplified state coordinates
simplified_state_coords = {}

# Iterate over each state's coordinates
for state_name, state_coord in state_coords.items():
    # If the state has multiple polygons, simplify to a single polygon
    if len(state_coord) > 1:
        state_coord = state_coord[0]
    # Store the simplified coordinates in the dictionary
    simplified_state_coords[state_name] = state_coord
    
print(simplified_state_coords["Oklahoma"])

(-103.002565, 36.526588)


In [7]:
print("Current directory:", os.getcwd())

Current directory: C:\Users\Ivan\Documents


In [8]:
import json

with open('data/state_coordinates_1.json', 'w') as f:
    json.dump(simplified_state_coords, f)

In [9]:
import requests
import json

# Define the API endpoint with the scheme
api_url = "https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/us-state-boundaries/records?limit=50"

# Make the API request
response = requests.get(api_url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()
    
    # Check if 'results' key exists in the response
    if 'results' in data:
        # Initialize a dictionary to store state coordinates
        state_coords = {}

        # Iterate over each record in the 'results'
        for record in data['results']:
            # Extract the name of the state
            state_name = record['name']

            # Extract the coordinates of the state boundary
            coordinates = record['st_asgeojson']['geometry']['coordinates']

            # Add the state name and coordinates to the dictionary
            state_coords[state_name] = coordinates

        # Remove certain states from the dictionary
        states_to_remove = ["Alaska", "Hawaii"]  # Add the names of states you want to remove
        for state in states_to_remove:
            if state in state_coords:
                del state_coords[state]

        # Save the state coordinates to a JSON file
        with open('data/state_coordinates_filtered.json', 'w') as f:
            json.dump(state_coords, f)

        print("Filtered state coordinates saved successfully.")
    else:
        print("No 'results' found in the API response.")
else:
    print("Failed to fetch data from the API.")

Filtered state coordinates saved successfully.
