In [None]:
!pip install geopandas pandas fiona shapely pyproj rtree



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
# CHANGE input_folder: Path to the folder in your Google Drive containing your vector files
# Example: 'My Drive/Shapefiles'
input_folder = '/content/drive/My Drive/Shapefiles' # @param {type:"string"}
if not os.path.exists(input_folder):
    raise SystemExit("Folder not found. Check your folder path.")
else:
    print(f"Processing vector files in: {input_folder}")
area_field = input("Enter the numeric field (numeric): ").strip()
class_field = input("Enter the class field (string): ").strip()
output_folder = os.path.join(input_folder, "Shapefile_Stats_Excel") # @param {type:"string"}
os.makedirs(output_folder, exist_ok=True)
print(f"Results will be saved in: {output_folder}")

Processing vector files in: /content/drive/My Drive/Shapefiles
Enter the numeric field for area: area_m2
Enter the class field: fclass
Results will be saved in: /content/drive/My Drive/Shapefiles/Average_Area_Outputs


In [None]:
import glob
import geopandas as gpd
import pandas as pd
import os
from concurrent.futures import ThreadPoolExecutor

extensions = ['*.shp', '*.gpkg', '*.geojson', '*.json', '*.kml', '*.gml']

vector_files = []
for ext in extensions:
    vector_files.extend(glob.glob(os.path.join(input_folder, ext)))

if not vector_files:
    raise SystemExit("No vector files found.")

def process_file(file_path, output_folder, area_field, class_field):
    try:
        gdf = gpd.read_file(file_path)
    except:
        print(f"Failed to read {file_path}")
        return

    if area_field not in gdf.columns or class_field not in gdf.columns:
        print(f"Skipping {file_path} (fields not found)")
        return

    # Summary stats
    grouped = gdf.groupby(class_field)[area_field].agg(['sum', 'count', 'min', 'max'])
    grouped['average'] = grouped['sum'] / grouped['count']

    print(f"\nFile: {os.path.basename(file_path)}")
    print(grouped[['count', 'sum', 'min', 'max', 'average']])

    # Save results
    output_file = os.path.join(
        output_folder,
        f"{os.path.splitext(os.path.basename(file_path))[0]}_area_stats.xlsx"
    )

    grouped.to_excel(output_file)
    print(f"Saved: {output_file}")

# Threaded batch execution
max_threads = min(8, len(vector_files))
with ThreadPoolExecutor(max_workers=max_threads) as executor:
    for file_path in vector_files:
        executor.submit(process_file, file_path, output_folder, area_field, class_field)

print("Batch statistic processing complete!")

Skipping /content/drive/My Drive/Shapefiles/NL_coastal_marine_infrastructure.gpkg (fields not found)
File: NL_agriculture.gpkg


File: NL_airports.gpkg

File: NL_dams.gpkg

File: NL_parking.gpkg
Skipping /content/drive/My Drive/Shapefiles/NL_roads.gpkg (fields not found)

File: NL_parks.gpkg
                                 min           max       average
fclass                                                          
farmland                  269.791110  1.933057e+06  75637.069847
farmyard                 1682.750653  1.257321e+05  22927.782899
greenhouse_horticulture    10.088567  1.008857e+01     10.088567
meadow                    237.013580  3.866589e+05  27120.553218
                                   min           max       average
fclass                                                            
burial_site                 376.899098  8.964853e+04  5.818399e+03
garden                        0.420042  5.235040e+03  1.274380e+02
leisure_park                 22.847384  1.027092e