In [None]:
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple

# Set visualization style
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

In [None]:
# MCP Server endpoint
MCP_ENDPOINT = "http://localhost:8000/query"  # Adjust if needed

def query_mcp(sql_query: str) -> pd.DataFrame:
    """
    Execute a SQL query via the MCP server and return results as a DataFrame.
    
    Args:
        sql_query: DuckDB SQL query string
        
    Returns:
        pandas DataFrame with query results
    """
    response = requests.post(
        MCP_ENDPOINT,
        json={"query": sql_query},
        headers={"Content-Type": "application/json"}
    )
    response.raise_for_status()
    
    result = response.json()
    
    # Convert to DataFrame
    if 'data' in result:
        return pd.DataFrame(result['data'])
    else:
        return pd.DataFrame()

def test_connection():
    """Test MCP server connection."""
    try:
        query = """
        SET THREADS=100;
        INSTALL httpfs; LOAD httpfs;
        CREATE OR REPLACE SECRET s3 (
            TYPE S3,
            ENDPOINT 'rook-ceph-rgw-nautiluss3.rook',
            URL_STYLE 'path',
            USE_SSL 'false'
        );
        SELECT 'Connection successful!' as status;
        """
        result = query_mcp(query)
        print(result)
        return True
    except Exception as e:
        print(f"Connection failed: {e}")
        return False

# Test connection
test_connection()

In [None]:
def analyze_hydrobasins_for_country(country_code: str, country_name: str, top_n: int = 3) -> pd.DataFrame:
    """
    Analyze Level 6 HydroBASINS for a given country.
    
    Args:
        country_code: ISO 2-letter country code (e.g., 'US', 'CN')
        country_name: Full country name for display
        top_n: Number of top basins to return
        
    Returns:
        DataFrame with top hydrobasins and their scores
    """
    
    query = f"""
    SET THREADS=100;
    INSTALL httpfs; LOAD httpfs;
    CREATE OR REPLACE SECRET s3 (
        TYPE S3,
        ENDPOINT 'rook-ceph-rgw-nautiluss3.rook',
        URL_STYLE 'path',
        USE_SSL 'false'
    );
    
    WITH basin_wetlands AS (
        -- Get wetlands in each basin for this country
        SELECT 
            hb.id as basin_id,
            hb.PFAF_ID,
            hb.UP_AREA,
            hb.SUB_AREA,
            w.h8,
            w.h0
        FROM read_parquet('s3://public-overturemaps/hex/countries.parquet') c
        JOIN read_parquet('s3://public-wetlands/glwd/hex/**') w 
            ON c.h8 = w.h8 AND c.h0 = w.h0
        JOIN read_parquet('s3://public-hydrobasins/level_06/hexes/**') hb 
            ON w.h8 = hb.h8 AND w.h0 = hb.h0
        WHERE c.country = '{country_code}' AND w.Z > 0
    ),
    basin_metrics AS (
        SELECT 
            bw.basin_id,
            bw.PFAF_ID,
            bw.UP_AREA,
            bw.SUB_AREA,
            -- A. Total wetland area
            COUNT(DISTINCT bw.h8) as wetland_hex_count,
            ROUND(COUNT(DISTINCT bw.h8) * 73.7327598, 2) as wetland_area_hectares,
            -- B. Total carbon in wetlands
            ROUND(COALESCE(SUM(carb.carbon), 0), 2) as total_carbon,
            -- C. Protected wetland fraction
            ROUND(
                COUNT(DISTINCT CASE WHEN wdpa.h8 IS NOT NULL THEN bw.h8 END)::FLOAT / 
                NULLIF(COUNT(DISTINCT bw.h8), 0),
                3
            ) as protected_fraction,
            -- D. Average NCP score
            ROUND(AVG(ncp.ncp), 3) as avg_ncp_score
        FROM basin_wetlands bw
        LEFT JOIN read_parquet('s3://public-carbon/hex/vulnerable-carbon/**') carb 
            ON bw.h8 = carb.h8 AND bw.h0 = carb.h0
        LEFT JOIN read_parquet('s3://public-wdpa/hex/**') wdpa 
            ON bw.h8 = wdpa.h8 AND bw.h0 = wdpa.h0
        LEFT JOIN read_parquet('s3://public-ncp/hex/ncp_biod_nathab/**') ncp 
            ON bw.h8 = ncp.h8 AND bw.h0 = ncp.h0
        GROUP BY bw.basin_id, bw.PFAF_ID, bw.UP_AREA, bw.SUB_AREA
    )
    SELECT 
        basin_id,
        PFAF_ID,
        UP_AREA as upstream_area_km2,
        SUB_AREA as basin_area_km2,
        wetland_hex_count,
        wetland_area_hectares,
        total_carbon,
        protected_fraction,
        avg_ncp_score,
        -- Composite score (normalize each metric to 0-1 scale, then average)
        ROUND(
            (wetland_area_hectares / MAX(wetland_area_hectares) OVER () * 0.25 +
             total_carbon / NULLIF(MAX(total_carbon) OVER (), 0) * 0.25 +
             protected_fraction * 0.25 +
             avg_ncp_score * 0.25),
            3
        ) as composite_score
    FROM basin_metrics
    WHERE wetland_hex_count > 0
    ORDER BY composite_score DESC
    LIMIT {top_n};
    """
    
    print(f"\n{'='*80}")
    print(f"Analyzing: {country_name} ({country_code})")
    print(f"{'='*80}")
    
    result = query_mcp(query)
    result['country'] = country_name
    result['country_code'] = country_code
    
    # Display results
    print(f"\nTop {top_n} Priority Hydrobasins in {country_name}:")
    print(result[[
        'basin_id', 'PFAF_ID', 'wetland_area_hectares', 'total_carbon', 
        'protected_fraction', 'avg_ncp_score', 'composite_score'
    ]].to_string(index=False))
    
    return result

## North America: United States, Canada, and Mexico

In [None]:
# United States
us_results = analyze_hydrobasins_for_country('US', 'United States', top_n=3)

In [None]:
# Canada
canada_results = analyze_hydrobasins_for_country('CA', 'Canada', top_n=3)

In [None]:
# Mexico
mexico_results = analyze_hydrobasins_for_country('MX', 'Mexico', top_n=3)

## Asia: China, South Korea, and Thailand

In [None]:
# China
china_results = analyze_hydrobasins_for_country('CN', 'China', top_n=3)

In [None]:
# South Korea
korea_results = analyze_hydrobasins_for_country('KR', 'South Korea', top_n=3)

In [None]:
# Thailand
thailand_results = analyze_hydrobasins_for_country('TH', 'Thailand', top_n=3)

## Europe: United Kingdom, France, and Spain

In [None]:
# United Kingdom
uk_results = analyze_hydrobasins_for_country('GB', 'United Kingdom', top_n=3)

In [None]:
# France
france_results = analyze_hydrobasins_for_country('FR', 'France', top_n=3)

In [None]:
# Spain
spain_results = analyze_hydrobasins_for_country('ES', 'Spain', top_n=3)

## South America: Brazil and Chile

In [None]:
# Brazil
brazil_results = analyze_hydrobasins_for_country('BR', 'Brazil', top_n=3)

In [None]:
# Chile
chile_results = analyze_hydrobasins_for_country('CL', 'Chile', top_n=3)

## Australia

In [None]:
# Australia
australia_results = analyze_hydrobasins_for_country('AU', 'Australia', top_n=3)

## India

In [None]:
# India
india_results = analyze_hydrobasins_for_country('IN', 'India', top_n=3)

## Summary: Combined Results

In [None]:
# Combine all results
all_results = pd.concat([
    us_results, canada_results, mexico_results,
    china_results, korea_results, thailand_results,
    uk_results, france_results, spain_results,
    brazil_results, chile_results,
    australia_results, india_results
], ignore_index=True)

print("\n" + "="*80)
print("SUMMARY: Top Priority Hydrobasins Across All Countries")
print("="*80)
print(all_results[[
    'country', 'basin_id', 'PFAF_ID', 'wetland_area_hectares', 
    'total_carbon', 'protected_fraction', 'avg_ncp_score', 'composite_score'
]].to_string(index=False))

# Save to CSV
all_results.to_csv('priority_hydrobasins_results.csv', index=False)
print("\nResults saved to: priority_hydrobasins_results.csv")

## Visualization: Comparative Analysis

In [None]:
# Create comparison plots
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# A. Wetland Area by Country
country_totals = all_results.groupby('country')['wetland_area_hectares'].sum().sort_values(ascending=False)
axes[0, 0].barh(country_totals.index, country_totals.values, color='steelblue')
axes[0, 0].set_xlabel('Total Wetland Area (hectares)')
axes[0, 0].set_title('A. Wetland Area in Top Hydrobasins by Country')
axes[0, 0].grid(axis='x', alpha=0.3)

# B. Carbon Storage by Country
carbon_totals = all_results.groupby('country')['total_carbon'].sum().sort_values(ascending=False)
axes[0, 1].barh(carbon_totals.index, carbon_totals.values, color='darkgreen')
axes[0, 1].set_xlabel('Total Vulnerable Carbon')
axes[0, 1].set_title('B. Carbon Storage in Top Hydrobasins by Country')
axes[0, 1].grid(axis='x', alpha=0.3)

# C. Protected Fraction by Country
protected_avg = all_results.groupby('country')['protected_fraction'].mean().sort_values(ascending=False)
axes[1, 0].barh(protected_avg.index, protected_avg.values, color='orange')
axes[1, 0].set_xlabel('Average Protected Fraction')
axes[1, 0].set_title('C. Protection Coverage in Top Hydrobasins by Country')
axes[1, 0].set_xlim(0, 1)
axes[1, 0].grid(axis='x', alpha=0.3)

# D. NCP Score by Country
ncp_avg = all_results.groupby('country')['avg_ncp_score'].mean().sort_values(ascending=False)
axes[1, 1].barh(ncp_avg.index, ncp_avg.values, color='purple')
axes[1, 1].set_xlabel('Average NCP Score')
axes[1, 1].set_title('D. Nature Contributions in Top Hydrobasins by Country')
axes[1, 1].set_xlim(0, 1)
axes[1, 1].grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.savefig('priority_hydrobasins_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("Visualization saved to: priority_hydrobasins_comparison.png")

In [None]:
# Composite score comparison
plt.figure(figsize=(14, 8))
sns.boxplot(data=all_results, x='country', y='composite_score', palette='Set2')
plt.xticks(rotation=45, ha='right')
plt.xlabel('Country')
plt.ylabel('Composite Score')
plt.title('Distribution of Composite Scores Across Countries (Top 3 Hydrobasins Each)')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig('composite_score_distribution.png', dpi=300, bbox_inches='tight')
plt.show()

print("Composite score distribution saved to: composite_score_distribution.png")

## Key Findings

### Methodology

For each country, we identified the top 3 Level 6 HydroBASINS based on a composite score that equally weights four key metrics:

1. **Wetland Area (25%)**: Total hectares of wetlands from GLWD
2. **Carbon Storage (25%)**: Vulnerable carbon in wetlands
3. **Protection Status (25%)**: Fraction of wetlands within WDPA protected areas
4. **Nature's Contributions (25%)**: Average NCP biodiversity score

### Interpretation

The composite score helps identify hydrobasins that balance multiple conservation priorities:
- High wetland area indicates ecological significance
- High carbon storage suggests climate mitigation importance
- Low protection fraction highlights conservation gaps
- High NCP scores indicate biodiversity value and ecosystem services

### Next Steps

The results can be used to:
- Prioritize watersheds for conservation investment
- Identify protection gaps in high-value wetlands
- Support climate and biodiversity policy decisions
- Guide restoration and protection efforts