## Setup

In [None]:
# Add scripts directory to path
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent / 'scripts'))

# Import geometry functions
from geometry import (
    download_country_shapes,
    download_nuts3_shapes,
    join_shapes,
    point_in_shape,
    mask_shape,
    buffer_shape,
    get_shape_area,
    get_european_union_shape,
)

# For visualization
import matplotlib.pyplot as plt
import pandas as pd

print("✓ Setup complete")

## 1. Download Country Shapes

Download boundaries for European countries. Data is cached locally after first download.

In [None]:
# Download Germany and Poland
countries = download_country_shapes(['DE', 'PL'])

print(f"Downloaded {len(countries)} countries")
print("\nCountry data:")
countries[['country', 'geometry']]

In [None]:
# Visualize
fig, ax = plt.subplots(figsize=(10, 8))
countries.plot(ax=ax, edgecolor='black', color=['blue', 'red'], alpha=0.5)
countries.apply(lambda x: ax.annotate(text=x['country'], 
                                       xy=x.geometry.centroid.coords[0],
                                       ha='center', fontsize=14, fontweight='bold'), axis=1)
plt.title('Germany and Poland', fontsize=16)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True, alpha=0.3)
plt.show()

## 2. Join Shapes

Combine multiple geometries into a single unified boundary.

In [None]:
# Join Germany and Poland
combined = join_shapes(countries)

print(f"Combined shape type: {type(combined).__name__}")
print(f"Combined area: {get_shape_area(combined):,.0f} km²")

# Compare with individual areas
de_area = get_shape_area(countries[countries['country'] == 'DE'])
pl_area = get_shape_area(countries[countries['country'] == 'PL'])
print(f"\nGermany area:  {de_area:,.0f} km²")
print(f"Poland area:   {pl_area:,.0f} km²")
print(f"Sum of areas:  {de_area + pl_area:,.0f} km²")

In [None]:
# Visualize combined shape
import geopandas as gpd

fig, ax = plt.subplots(figsize=(10, 8))
gpd.GeoDataFrame(geometry=[combined]).plot(ax=ax, color='green', alpha=0.5, edgecolor='black', linewidth=2)
plt.title('Combined Germany + Poland', fontsize=16)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True, alpha=0.3)
plt.show()

## 3. Point-in-Shape Checks

Test if coordinate points fall within boundaries.

In [None]:
# Define cities with coordinates (lat, lon)
cities = {
    'Berlin': (52.5200, 13.4050),
    'Munich': (48.1351, 11.5820),
    'Warsaw': (52.2297, 21.0122),
    'Krakow': (50.0647, 19.9450),
    'Paris': (48.8566, 2.3522),
    'Prague': (50.0755, 14.4378),
}

# Check which cities are in our combined region
results = []
for city, (lat, lon) in cities.items():
    in_combined = point_in_shape(lat, lon, combined)
    results.append({'City': city, 'Latitude': lat, 'Longitude': lon, 'In DE+PL': in_combined})

results_df = pd.DataFrame(results)
print("\nCity location checks:")
print(results_df.to_string(index=False))

In [None]:
# Visualize cities on map
fig, ax = plt.subplots(figsize=(12, 10))

# Plot countries
gpd.GeoDataFrame(geometry=[combined]).plot(ax=ax, color='lightblue', alpha=0.5, edgecolor='black', linewidth=2)

# Plot cities
for city, (lat, lon) in cities.items():
    in_region = point_in_shape(lat, lon, combined)
    color = 'green' if in_region else 'red'
    marker = 'o' if in_region else 'x'
    ax.plot(lon, lat, marker=marker, color=color, markersize=10, markeredgecolor='black', markeredgewidth=1)
    ax.annotate(city, (lon, lat), xytext=(5, 5), textcoords='offset points', fontsize=9, fontweight='bold')

plt.title('Cities in/out of DE+PL (Green=In, Red=Out)', fontsize=16)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True, alpha=0.3)
plt.show()

## 4. NUTS-3 Regions

Download and work with NUTS-3 statistical regions.

In [None]:
# Download NUTS-3 regions for Germany
nuts3_de = download_nuts3_shapes(['DE'])

print(f"Downloaded {len(nuts3_de)} NUTS-3 regions for Germany")
print("\nSample regions:")
nuts3_de[['NUTS_ID', 'NAME_LATN']].head(10)

In [None]:
# Visualize NUTS-3 regions
fig, ax = plt.subplots(figsize=(12, 10))
nuts3_de.plot(ax=ax, edgecolor='black', linewidth=0.5, cmap='tab20', alpha=0.7)
plt.title(f'German NUTS-3 Regions (n={len(nuts3_de)})', fontsize=16)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Find which NUTS-3 region contains Berlin
berlin = (52.5200, 13.4050)

for idx, row in nuts3_de.iterrows():
    if point_in_shape(berlin[0], berlin[1], row['geometry']):
        print(f"Berlin is in NUTS-3 region:")
        print(f"  ID: {row['NUTS_ID']}")
        print(f"  Name: {row['NAME_LATN']}")
        print(f"  Country: {row['CNTR_CODE']}")
        break

## 5. Intersection (Masking)

Compute the overlap between two boundaries.

In [None]:
# Create a circular region around Berlin
from shapely.geometry import Point

berlin_point = Point(13.4050, 52.5200)
berlin_circle = berlin_point.buffer(1.0)  # ~100km radius

# Find intersection with Germany
germany = countries[countries['country'] == 'DE']
intersection = mask_shape(germany, berlin_circle, return_gdf=True)

print(f"Intersection area: {get_shape_area(intersection):,.0f} km²")
print(f"Original Germany area: {get_shape_area(germany):,.0f} km²")

In [None]:
# Visualize intersection
fig, ax = plt.subplots(figsize=(12, 10))

# Plot Germany
germany.plot(ax=ax, color='lightblue', alpha=0.3, edgecolor='blue', linewidth=2, label='Germany')

# Plot circle
gpd.GeoDataFrame(geometry=[berlin_circle]).plot(ax=ax, color='yellow', alpha=0.3, edgecolor='orange', linewidth=2, label='100km around Berlin')

# Plot intersection
intersection.plot(ax=ax, color='red', alpha=0.5, edgecolor='darkred', linewidth=2, label='Intersection')

# Plot Berlin
ax.plot(13.4050, 52.5200, 'ko', markersize=10, label='Berlin')

plt.title('Intersection: Germany ∩ (100km radius around Berlin)', fontsize=16)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend(loc='best')
plt.grid(True, alpha=0.3)
plt.show()

## 6. Buffer Operations

Expand or contract boundaries by a distance.

In [None]:
# Download Luxembourg (small country)
luxembourg = download_country_shapes(['LU'])

# Create buffers
buffer_50km = buffer_shape(luxembourg, distance_km=50, return_gdf=True)
buffer_100km = buffer_shape(luxembourg, distance_km=100, return_gdf=True)

# Calculate areas
orig_area = get_shape_area(luxembourg)
area_50 = get_shape_area(buffer_50km)
area_100 = get_shape_area(buffer_100km)

print(f"Luxembourg area:        {orig_area:,.0f} km²")
print(f"+ 50km buffer:          {area_50:,.0f} km²  (+{area_50-orig_area:,.0f} km²)")
print(f"+ 100km buffer:         {area_100:,.0f} km²  (+{area_100-orig_area:,.0f} km²)")

In [None]:
# Visualize buffers
fig, ax = plt.subplots(figsize=(10, 10))

buffer_100km.plot(ax=ax, color='lightblue', alpha=0.3, edgecolor='blue', linewidth=2, label='100km buffer')
buffer_50km.plot(ax=ax, color='lightgreen', alpha=0.3, edgecolor='green', linewidth=2, label='50km buffer')
luxembourg.plot(ax=ax, color='red', alpha=0.7, edgecolor='darkred', linewidth=2, label='Luxembourg')

plt.title('Luxembourg with Buffers', fontsize=16)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend(loc='best')
plt.grid(True, alpha=0.3)
plt.show()

## 7. European Union Boundary

Get the unified boundary of all EU member states.

In [None]:
# Get EU boundary
eu = get_european_union_shape()

print(f"EU shape type: {type(eu).__name__}")
print(f"EU total area: {get_shape_area(eu):,.0f} km²")

In [None]:
# Check capital cities
capitals = {
    'Brussels (Belgium)': (50.8503, 4.3517),
    'Berlin (Germany)': (52.5200, 13.4050),
    'Paris (France)': (48.8566, 2.3522),
    'London (UK - not EU)': (51.5074, -0.1278),
    'Oslo (Norway - not EU)': (59.9139, 10.7522),
}

print("\nCapital cities in EU:")
for city, (lat, lon) in capitals.items():
    is_in_eu = point_in_shape(lat, lon, eu)
    status = "✓ In EU" if is_in_eu else "✗ Not in EU"
    print(f"  {city:30s}: {status}")

## 8. Practical Application: Filter Power Network Buses

Use the geometry module to filter buses by country.

In [None]:
# Load buses data
buses_path = Path.cwd().parent / 'data' / 'raw' / 'OSM Prebuilt Electricity Network' / 'buses.csv'

if buses_path.exists():
    buses = pd.read_csv(buses_path)
    print(f"Loaded {len(buses)} buses")
    print("\nSample data:")
    print(buses[['bus_id', 'voltage', 'x', 'y', 'country']].head())
else:
    print(f"Buses file not found at {buses_path}")
    buses = None

In [None]:
if buses is not None:
    # Filter buses in Germany using geometry
    germany_shape = download_country_shapes(['DE'])
    
    # Check first 100 buses (for speed)
    sample_buses = buses.head(100)
    
    sample_buses['in_germany_geom'] = sample_buses.apply(
        lambda row: point_in_shape(row['y'], row['x'], germany_shape),
        axis=1
    )
    
    # Compare with country column
    sample_buses['in_germany_col'] = sample_buses['country'] == 'DE'
    
    # Check agreement
    agreement = (sample_buses['in_germany_geom'] == sample_buses['in_germany_col']).sum()
    print(f"\nGeometry vs column agreement: {agreement}/{len(sample_buses)} ({100*agreement/len(sample_buses):.1f}%)")
    
    print(f"\nBuses in Germany (by geometry): {sample_buses['in_germany_geom'].sum()}")
    print(f"Buses in Germany (by column):   {sample_buses['in_germany_col'].sum()}")

## Summary

This notebook demonstrated:

1. ✅ **Downloading shapes** - Countries and NUTS-3 regions from Eurostat GISCO
2. ✅ **Joining shapes** - Union multiple geometries into one
3. ✅ **Point checks** - Test if coordinates fall within boundaries
4. ✅ **NUTS-3 regions** - Compatible with all geometry operations
5. ✅ **Intersection** - Compute overlap between boundaries
6. ✅ **Buffers** - Expand/contract boundaries by distance
7. ✅ **EU boundary** - Unified shape of all EU member states
8. ✅ **Practical use** - Filter power network elements by location

All data is automatically cached for fast subsequent access!