In [99]:
import os

import geopandas as gpd
import pandas as pd
import numpy as np

Compare perimeter qualities from three different sources:

1. HYSETS (source is WSC and USGS, derived from EarthEnv DEM90 where unavailable from official resources, or approximated as as a square centred at the officially reported station location with area equal to the reported area.
2. **Updated WSC** Water Survey of Canada (WSC) updated over 8000 basin polygons in July 2022.  These can be downloaded [here](https://collaboration.cmc.ec.gc.ca/cmc/hydrometrics/www/HydrometricNetworkBasinPolygons/).
3. **BCUB**: the BCUB polygons were filtered for the best match, where a match is measured by the polygon overlap accuracy.  Accuracy is % intersected area divided by reported station drainage area.

In [109]:
BASE_DIR = os.path.dirname(os.getcwd())
DATA_DIR = os.path.join(BASE_DIR, 'input_data/')

In [113]:
# the path where updated (2022-07-21) WSC basin polygons are saved
# https://collaboration.cmc.ec.gc.ca/cmc/hydrometrics/www/HydrometricNetworkBasinPolygons/
wsc_geom_folder = os.path.join(DATA_DIR, 'WSC_basins/07/07FD001/')

In [118]:
current_wsc_basin = gpd.read_file(wsc_geom_folder + '07FD001_DrainageBasin_BassinDeDrainage.shp')
current_wsc_basin = current_wsc_basin.to_crs(3005)

In [116]:
# import the HYSETS basins
hs_df = gpd.read_file(os.path.join(DATA_DIR, 'HYSETS_data/HYSETS_watershed_boundaries/HYSETS_watershed_boundaries_20200730.shp'))
hs_df

Unnamed: 0,features,Name,OfficialID,FlagPAVICS,Source,Area,geometry
0,1,SAINT JOHN RIVER AT FORT KENT,01AD002,0,HYDAT,14703.921076,"POLYGON ((-69.27594 47.81781, -69.27511 47.817..."
1,1,ST. FRANCIS RIVER AT OUTLET OF GLASIER LAKE,01AD003,0,HYDAT,1358.643465,"POLYGON ((-68.95533 47.20243, -68.95589 47.202..."
2,1,MADAWASKA (RIVIERE) A 6 KM EN AVAL DU BARRAGE ...,01AD015,1,HYDAT,2712.000000,"POLYGON ((-68.30417 47.98750, -68.29805 47.970..."
3,1,FISH RIVER NEAR FORT KENT,01AE001,0,HYDAT,2245.763823,"POLYGON ((-68.59050 47.23913, -68.58887 47.238..."
4,1,BLACK BROOK NEAR ST-ANDRE-DE-MADAWASKA,01AF006,0,HYDAT,14.200000,"POLYGON ((-67.75627 47.07445, -67.75627 47.108..."
...,...,...,...,...,...,...,...
14420,1,,27038,0,Mexico,628.700000,"POLYGON ((-97.71909 20.02060, -97.71909 20.020..."
14421,1,,28108,0,Mexico,2579.040000,"POLYGON ((-96.77729 19.75277, -96.77728 19.752..."
14422,1,,10037,0,Mexico,29583.500000,"POLYGON ((-108.49167 26.94667, -108.49250 26.9..."
14423,1,,12520,0,Mexico,19315.830000,"POLYGON ((-104.05167 21.41083, -104.05250 21.4..."


In [119]:
hs_basin = hs_df[hs_df['OfficialID'] == '07FD001'].copy()
hs_basin = hs_basin.set_crs(4326)
hs_basin = hs_basin.to_crs(3005)

In [120]:
# open polygon from BCUB
bc_df = gpd.read_file('intersecting_polygons.geojson')
bc_df.crs

<Derived Projected CRS: EPSG:3005>
Name: NAD83 / BC Albers
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Canada - British Columbia.
- bounds: (-139.04, 48.25, -114.08, 60.01)
Coordinate Operation:
- name: British Columbia Albers
- method: Albers Equal Area
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [121]:
assert bc_df.crs == new_polygon.crs

In [122]:
intersections = gpd.sjoin(bc_df, new_polygon, how='inner', predicate='intersects')
# Calculate the intersection area
intersections['intersection_area'] = intersections.apply(
    lambda row: row['geometry'].intersection(new_polygon.loc[row['index_right']].geometry).area, axis=1)

# Calculate intersection area as a percentage of gdf2's polygon area
# Assuming gdf2 has one polygon, or you want to compare with the area of each intersected polygon in gdf2
intersections['percentage'] = intersections.apply(
    lambda row: (row['intersection_area'] / new_polygon.loc[row['index_right']].geometry.area) * 100, axis=1)


In [123]:
intersections.columns

Index(['id', 'centroid', 'pour_pt', 'area', 'drainage_area_km2', 'acc_polygon',
       'ppt_acc', 'ppt_lon_m_3005', 'ppt_lat_m_3005', 'elevation_m',
       'aspect_deg', 'slope_deg', 'region_code', 'land_use_forest_frac_2010',
       'land_use_forest_frac_2015', 'land_use_forest_frac_2020',
       'land_use_shrubs_frac_2010', 'land_use_shrubs_frac_2015',
       'land_use_shrubs_frac_2020', 'land_use_grass_frac_2010',
       'land_use_grass_frac_2015', 'land_use_grass_frac_2020',
       'land_use_wetland_frac_2010', 'land_use_wetland_frac_2015',
       'land_use_wetland_frac_2020', 'land_use_crops_frac_2010',
       'land_use_crops_frac_2015', 'land_use_crops_frac_2020',
       'land_use_urban_frac_2010', 'land_use_urban_frac_2015',
       'land_use_urban_frac_2020', 'land_use_water_frac_2010',
       'land_use_water_frac_2015', 'land_use_water_frac_2020',
       'land_use_snow_ice_frac_2010', 'land_use_snow_ice_frac_2015',
       'land_use_snow_ice_frac_2020', 'logk_ice_x100', 'k_stdev

In [126]:
out_df = intersections[['intersection_area', 'percentage', 'region_code', 'geometry']].sort_values('percentage', ascending=False)
out_df = out_df[out_df.index == 11]
out_df.to_file('BCUB_polygon_best_overlap.geojson')
print(out_df)

    intersection_area  percentage region_code  \
11       3.559850e+09   98.710565         PCR   

                                             geometry  
11  POLYGON ((1306830.742 1212985.886, 1306831.703...  


In [46]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.io import output_notebook
output_notebook()

In [94]:
def plot_all_geometries(gdf):
    # Create a Bokeh plot
    p = figure(title="", x_axis_label='Longitude', y_axis_label='Latitude',
              width=600, height=400)

    # Iterate through each geometry in the GeoDataFrame
    lines = ['solid', 'dashed', 'dotted']
    n = 0
    for i, row in gdf.iterrows():
        name = row['name']
        geometry = row['geometry']

        if geometry.geom_type == 'Polygon':
            x, y = geometry.exterior.xy
            p.line(x, y, line_width=3, color='black',
                   legend_label=name, line_dash=lines[n])
        elif geometry.geom_type == 'MultiPolygon':
            # Plot each polygon in a MultiPolygon
            for poly in geometry:
                x, y = poly.exterior.xy
                p.line(x, y, line_width=2, legend_label=name, line_dash=lines[n])
        # Add more conditions for other geometry types if necessary
        n += 1
    # Show the plot
    p.legend.location='bottom_right'
    p.toolbar.autohide = True

    return p

In [95]:
comb_df = gpd.GeoDataFrame(pd.concat([hs_basin, new_polygon, out_df]), crs='EPSG:3005')
comb_df['name'] = ['HYSETS', 'WSC', 'BCUB']
comb_df = comb_df[['name', 'geometry']].to_crs(4326)

In [96]:
p1 = plot_all_geometries(comb_df)

In [97]:
show(p1)