In [8]:
import fiona
from fiona.transform import transform_geom
!pip install geopandas
import geopandas as gpd
import os

# Define input and output directories
input_dir = "."  # Current folder or change as needed
output_dir = "./cleaned_geojson"
os.makedirs(output_dir, exist_ok=True)

# Iterate over all .geojson files in the input directory
for filename in os.listdir(input_dir):
    if filename.endswith(".geojson"):
        input_path = os.path.join(input_dir, filename)
        output_path = os.path.join(output_dir, filename.replace(".geojson", "_clean.geojson"))

        print(f"\nProcessing file: {filename}")
        total = 0
        written = 0

        with fiona.open(input_path) as src:
            meta = src.meta
            with fiona.open(output_path, 'w', **meta) as dst:
                for feature in src:
                    total += 1
                    try:
                        feature["geometry"] = transform_geom(src.crs, src.crs, feature["geometry"])
                        dst.write(feature)
                        written += 1
                    except Exception as e:
                        print(f"  Skipped feature due to error: {e}")

        print(f"  Total features: {total}, Successfully written: {written}, Skipped: {total - written}")

print("\nCleaning complete. Cleaned files saved in:", output_dir)

# This script processes all GeoJSON files in the specified directory,

Collecting geopandas
  Downloading geopandas-1.0.1-py3-none-any.whl.metadata (2.2 kB)
Collecting numpy>=1.22 (from geopandas)
  Using cached numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl.metadata (60 kB)
Collecting pyogrio>=0.7.2 (from geopandas)
  Downloading pyogrio-0.11.0-cp39-cp39-macosx_12_0_arm64.whl.metadata (5.3 kB)
Collecting pandas>=1.4.0 (from geopandas)
  Using cached pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting pyproj>=3.3.0 (from geopandas)
  Downloading pyproj-3.6.1-cp39-cp39-macosx_11_0_arm64.whl.metadata (31 kB)
Collecting shapely>=2.0.0 (from geopandas)
  Downloading shapely-2.0.7-cp39-cp39-macosx_11_0_arm64.whl.metadata (6.8 kB)
Collecting pytz>=2020.1 (from pandas>=1.4.0->geopandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas>=1.4.0->geopandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading geopandas-1.0.1-py3-none-any.whl (323 kB)
Using cached nump

In [9]:
# Path to cleaned files
cleaned_dir = "./cleaned_geojson"

# List all cleaned GeoJSON files
for filename in os.listdir(cleaned_dir):
    if filename.endswith(".geojson"):
        path = os.path.join(cleaned_dir, filename)
        print(f"\nLoading {filename}...")
        gdf = gpd.read_file(path)
        print("  Features:", len(gdf))
        print("  Geometry types:", gdf.geom_type.unique().tolist())
        print("  CRS:", gdf.crs)
        print("  Columns:", gdf.columns.tolist())
        display(gdf.head(2))
