In [15]:
import yaml
import geopandas as gpd
from pathlib import Path
import sys

In [16]:
# Load configuration from YAML
try:
    with open('config/secrets.yml', 'r') as f:
        config = yaml.safe_load(f)
    shapefile_path = config['shapefile']['path']
except Exception as e:
    print(f"Error reading config file: {e}")
    sys.exit(1)

In [18]:
# Read the shapefile
try:
    gdf = gpd.read_file(shapefile_path)
except Exception as e:
    print(f"Error reading shapefile: {e}")
    sys.exit(1)

In [19]:
# Check if column exists
if 'LINKED_AGG' not in gdf.columns:
    print("Error: 'LINKED_AGG' column not found in the shapefile")
    sys.exit(1)

In [20]:
# Check for duplicates
duplicates = gdf['LINKED_AGG'].duplicated(keep=False)

if duplicates.any():
    print("\n⚠️  We have a problem of duplicated OSCs! ⚠️")
    print(f"Number of duplicated entries: {duplicates.sum()}")
    print("Duplicate values:")
    print(gdf[duplicates]['LINKED_AGG'].value_counts())
else:
    print("\n✅ Everything is okay - no duplicated OSCs found!")


⚠️  We have a problem of duplicated OSCs! ⚠️
Number of duplicated entries: 27
Duplicate values:
LINKED_AGG
64    9
63    8
21    5
50    3
54    2
Name: count, dtype: int64


LINKED_AGG
64    9
63    8
21    5
50    3
54    2
Name: count, dtype: int64