In [None]:
!pip install geopandas


In [4]:
import geopandas as gpd
import os
import warnings
import sys
from io import StringIO

# Suppress specific warnings
warnings.filterwarnings("ignore", message="Expecting property name enclosed in double quotes: line 1 column 2 (char 1)", module="fiona.ogrext")

# Create a custom stream class to filter out specific messages
class FilteredStream:
    def __init__(self, stream):
        self.stream = stream
        self.buffer = StringIO()

    def write(self, data):
        if "WARNING:fiona.ogrext:Expecting property name enclosed in double quotes" not in data:
            self.stream.write(data)
        else:
            self.buffer.write(data)  # Capture discarded data if needed

    def flush(self):
        self.stream.flush()

# Replace sys.stderr with our filtered stream
sys.stderr = FilteredStream(sys.stderr)


# List of input GeoJSON file paths
input_paths = [
    # "/geoJSON/cleaned/Agricultural_Minerals_Operations_Cleaned.geojson",
    # "/geoJSON/cleaned/Aquifers_Cleaned.geojson",
    # "/geoJSON/cleaned/Aviation_Facilities_Cleaned.geojson",
    # "/geoJSON/cleaned/Biodiesel_Plants_Cleaned.geojson",
    # "/geoJSON/cleaned/bridge/Bridges_Cleaned.geojson",
    # "/geoJSON/cleaned/CBP_2021_Cleaned.geojson",
    # "/geoJSON/cleaned/Cellular_Towers_Cleaned.geojson",
    # "/geoJSON/cleaned/Colleges_and_Universities_Cleaned.geojson",
    # "/geoJSON/cleaned/Construction_Minerals_Operations_Cleaned.geojson",
    # "/geoJSON/cleaned/Courthouses_Cleaned.geojson",
    # "/geoJSON/cleaned/DOE_Petroleum_Reserves_Cleaned.geojson",
    # "/geoJSON/cleaned/Dams_Cleaned.geojson",
    # "/geoJSON/cleaned/Dialysis_Centers_Cleaned.geojson",
    # "/geoJSON/cleaned/EMS_Stations_Cleaned.geojson",
    # "/geoJSON/cleaned/Ethanol_Plants_Cleaned.geojson",
    # "/geoJSON/cleaned/FDIC_Banks_Cleaned.geojson",
    # "/geoJSON/cleaned/Federal_Reserve_Cleaned.geojson",
    # "/geoJSON/cleaned/Ferrous_Metal_Mines_Cleaned.geojson",
    # "/geoJSON/cleaned/Ferrous_Metal_Processing_Plants_Cleaned.geojson",
    # "/geoJSON/cleaned/Fortune_500_HQ_Cleaned.geojson",
    # "/geoJSON/cleaned/Gas_Compressor_Stations_Cleaned.geojson",
    # "/geoJSON/cleaned/Gold_Repositories_Cleaned.geojson",
    # "/geoJSON/cleaned/Hospitals_Cleaned.geojson",
    # "/geoJSON/cleaned/Hydrocarbon_Pipelines_Cleaned.geojson",
    # "/geoJSON/cleaned/Industrial_Mineral_Operations_Cleaned.geojson",
    # "/geoJSON/cleaned/LNG_Storage_Facilities_Cleaned.geojson",
    # "/geoJSON/cleaned/LNG_Terminals_Cleaned.geojson",
    # "/geoJSON/cleaned/Local_Law_Enforcement_Cleaned.geojson",
    # "/geoJSON/cleaned/Manufacturing_Facilities_Cleaned.geojson",
    # "/geoJSON/cleaned/Microwave_Service_Towers_Cleaned.geojson",
    # "/geoJSON/cleaned/Military_Installations_Cleaned.geojson",
    # "/geoJSON/cleaned/Mines_and_Mineral_Resources_Cleaned.geojson",
    # "/geoJSON/cleaned/Natural_Gas_Wells_Cleaned.geojson",
    # "/geoJSON/cleaned/NGL_Pipelines_Cleaned.geojson",
    # "/geoJSON/cleaned/NG_Processing_Plants_Cleaned.geojson",
    # "/geoJSON/cleaned/Nonferrous_Metal_Mines_Cleaned.geojson",
    # "/geoJSON/cleaned/Oil_Refineries_Cleaned.geojson",
    # "/geoJSON/cleaned/Oil_Wells_Cleaned.geojson",
    # "/geoJSON/cleaned/Peak_Shaving_Facilities_Cleaned.geojson",
    # "/geoJSON/cleaned/Petroleum_Terminals_Cleaned.geojson",
    # "/geoJSON/cleaned/Pharmacies_Cleaned.geojson",
    # "/geoJSON/cleaned/Power_Transmission_Lines_Cleaned.geojson",
    # "/geoJSON/cleaned/Primary_Roads_Cleaned.geojson",
    # "/geoJSON/cleaned/Principal_Ports_Cleaned.geojson",
    # "/geoJSON/cleaned/Private_Schools_Cleaned.geojson",
    # "/geoJSON/cleaned/Public_Health_Departments_Cleaned.geojson",
    # "/geoJSON/cleaned/Rail_Lines_Cleaned.geojson",
    # "/geoJSON/cleaned/Refrigerated_Warehouses_Cleaned.geojson",
    # "/geoJSON/cleaned/Sand_and_Gravel_Operations_Cleaned.geojson",
    # "/geoJSON/cleaned/Solid_Waste_Facilities_Cleaned.geojson",
    # "/geoJSON/cleaned/Spaceports_Cleaned.geojson",
    # "/geoJSON/cleaned/State_Capitol_Buildings_Cleaned.geojson",
    # "/geoJSON/cleaned/State_Government_Buildings_Cleaned.geojson",
    # "/geoJSON/cleaned/USACE_Offices_Cleaned.geojson",
    # "/geoJSON/cleaned/USACE_Reservoirs_Cleaned.geojson",
    # "/geoJSON/cleaned/Uranium_and_Vanadium_Deposits_Cleaned.geojson",
    # "/geoJSON/cleaned/Urgent_Care_Facilities_Cleaned.geojson",
    # "/geoJSON/cleaned/VA_Medical_Facilities_Cleaned.geojson",

    "/old/datasets/raw/geographic/gadm41_USA_2.json"

]

# Directory to save GPKG files
output_directory = "/geoJSON/geopackages"
os.makedirs(output_directory, exist_ok=True)


In [None]:
import os
import time
import warnings
from datetime import timedelta

# Suppress specific warnings
warnings.filterwarnings("ignore", message="Expecting property name enclosed in double quotes: line 1 column 2 (char 1)", module="fiona.ogrext")



# Convert GeoJSON to GPKG with debugging
total_files = len(input_paths)
start_time = time.time()

for i, input_path in enumerate(input_paths):
    try:
        file_start_time = time.time()

        gdf = gpd.read_file(input_path)
        output_path = os.path.join(output_directory, os.path.basename(input_path).replace('.geojson', '.gpkg'))
        gdf.to_file(output_path, driver='GPKG')

        # Get file sizes
        original_size = os.path.getsize(input_path)
        new_size = os.path.getsize(output_path)
        size_reduction = original_size - new_size

        # Print file information
        print(f"\nProcessed file {i+1}/{total_files}: {os.path.basename(input_path)}")
        print(f"Original size: {original_size / (1024 * 1024):.2f} MB")
        print(f"New size: {new_size / (1024 * 1024):.2f} MB")
        print(f"Size reduction: {size_reduction / (1024 * 1024):.2f} MB ({(size_reduction / original_size) * 100:.2f}%)")

        # Estimate remaining time
        file_end_time = time.time()
        elapsed_time = file_end_time - start_time
        average_time_per_file = elapsed_time / (i + 1)
        remaining_time = average_time_per_file * (total_files - (i + 1))

        print(f"Time taken for this file: {timedelta(seconds=(file_end_time - file_start_time))}")
        print(f"Estimated remaining time: {timedelta(seconds=remaining_time)}")

    except Exception as e:
        print(f"Error converting {input_path}: {e}")

total_end_time = time.time()
total_elapsed_time = timedelta(seconds=(total_end_time - start_time))
print(f"\nAll files processed. Total time taken: {total_elapsed_time}")
