In [None]:
!pip install geopandas pandas fiona shapely pyproj rtree openpyxl



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
# CHANGE input_folder: Path to the folder in your Google Drive containing your vector files
# Example: 'My Drive/Shapefiles'
input_folder = '/content/drive/My Drive/Shapefiles' # @param {type:"string"}
output_folder = os.path.join(input_folder, "Average_CI_Outputs") # @param {type:"string"}
os.makedirs(output_folder, exist_ok=True)
print(f"Average compactness index outputs for each attribute class will be saved as an excel in: {output_folder}")
if not os.path.exists(input_folder):
    raise SystemExit("Folder not found. Check your folder path.")
else:
    print(f"Processing vector files in: {input_folder}")
CI_field = input("Enter the new field name you want to make for Compactness Index (CI): ").strip()
building_class_field = input("Enter the field name for the polygons you want to find the CI for based on category: ").strip()
os.makedirs(output_folder, exist_ok=True)
print(f"Results will be saved in: {output_folder}")

Processing vector files in: /content/drive/My Drive/Shapefiles
Enter the field name for Compactness Index (CI): area_m2
Enter the field name for building class: fclass
Results will be saved in: /content/drive/My Drive/Shapefiles/Average_CI_Outputs


In [None]:
import glob
import geopandas as gpd
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

extensions = ['*.shp', '*.gpkg', '*.geojson', '*.json', '*.kml', '*.gml']

vector_files = []
for ext in extensions:
    vector_files.extend(glob.glob(os.path.join(input_folder, ext)))

if not vector_files:
    raise SystemExit("No vector files found.")

def process_file(file_path, output_folder, CI_field, building_class_field):
    try:
        gdf = gpd.read_file(file_path)
    except:
        print(f"Failed to read {file_path}")
        return

    if CI_field not in gdf.columns or building_class_field not in gdf.columns:
        print(f"Skipping {file_path} (fields not found)")
        return

    grouped = gdf.groupby(building_class_field)[CI_field].agg(['sum','count'])
    grouped['average_CI'] = grouped['sum'] / grouped['count']
    print(f"\nFile: {os.path.basename(file_path)}")
    print(grouped[['average_CI']])

    output_file = os.path.join(output_folder, f"{os.path.splitext(os.path.basename(file_path))[0]}_average_CI.xlsx")
    grouped[['average_CI']].to_excel(output_file)
    print(f"Saved: {output_file}")

max_threads = min(8, len(vector_files))
with ThreadPoolExecutor(max_workers=max_threads) as executor:
    for file_path in vector_files:
        executor.submit(process_file, file_path, output_folder, CI_field, building_class_field)

print("Batch average compactness index calculations complete!")


File: NL_airports.gpkg

File: NL_dams.gpkg

File: NL_agriculture.gpkg

File: NL_parks.gpkg

File: NL_parking.gpkg
Skipping /content/drive/My Drive/Shapefiles/NL_coastal_marine_infrastructure.gpkg (fields not found)
          average_CI
fclass              
dam     19474.019467
                          average_CI
fclass                              
airport_international   10271.733712
airport_regional       404722.100445
apron                  172707.454814
runway                 535947.972371
                             average_CI
fclass                                 
burial_site                5.818399e+03
garden                     1.274380e+02
leisure_park               3.483240e+04
protected_provincial_park  1.362123e+08
          average_CI
fclass              
parking  1842.568347
Skipping /content/drive/My Drive/Shapefiles/NL_roads.gpkg (fields not found)
                           average_CI
fclass                               
farmland                 75637.069847
farmy