In [1]:
from os import listdir
from os.path import isfile, join
# from shapely.geometry import *
# from shapely.wkt import loads
# from shapely import LineString, MultiPoint
# from shapely.ops import nearest_points, split, snap, unary_union

import contextily as cx
import earthaccess
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib import colors
import numpy as np
import os
import pandas as pd
import xarray as xr
import shapely

  from .autonotebook import tqdm as notebook_tqdm


**To find where to change the variables to run, ctrl + f "SET"**

### Read in and prep NHD

In [2]:
def readNHD(index):
    ## Set-up
    mdata_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC/data/'
# NEED WITH WATERBODY FOR CUSHMAN!!!!
    prep_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHD_prepped/' # _with_waterbody

    # Define dtypes for lookup tables to preserve leading zeros
    dtype_dic= {'HUC4': str, 'HUC2': str, 'toBasin': str, 'level': str}
    # Read in HUC lookup table
    lookup = pd.read_csv(os.path.join(mdata_path, 'HUC4_lookup_no_great_lakes.csv'), dtype=dtype_dic)

    # Get current HUC2 and HUC4 IDs
    hu2 = 'HUC2_' + lookup.loc[index,'HUC4'][0:2]
    hu4 = 'NHDPLUS_H_' + lookup.loc[index,'HUC4'] + '_HU4_GDB'
    
    # Set data filepath
# NEED WITH WATERBODY FOR CUSHMAN!!!!!
    file_path = os.path.join(prep_path, hu2, hu4 + '_prepped.gpkg') # _with_waterbody

    ## Read in prepped NHD flowlines
    features = ['NHDPlusID', 'GNIS_Name', 'LengthKM', 'WidthM', 'Bin', 'geometry']
    basin = gpd.read_file(filename=file_path, columns=features, engine='pyogrio')

    # Drop reaches that are shorter than their width
    basin = basin[basin['LengthKM']*1000 > basin['WidthM']]
    
    # Make geometry 2D LineStrings
    basin['geometry'] = basin.geometry.explode().force_2d()
    return basin

In [1]:
# Set slurm job index
# (see: nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC/data/HUC4_lookup_no_great_lakes.csv)
i = 4 # HUC4 0108, Connecticut

In [4]:
basin = readNHD(index=i)

In [None]:
# basin

In [None]:
# names = list(basin.GNIS_Name.unique())
# names

In [None]:
# basin[(basin['GNIS_Name'] == 'Swift River') & (basin['WidthM'] < 25)]

In [None]:
# # Are any of the Swift River reaches under 20 m wide?

# fig, ax = plt.subplots()
# basin.plot(ax=ax)
# basin[(basin['GNIS_Name'] == 'Ware River') 
#       & (basin['WidthM'] < 25)
#      ].plot(ax=ax, color='r')

# plt.xlim(-8100000, -8000000)
# plt.ylim(5200000,5300000)

#### Finding tiles with geolocation_qual < 4096 (CAN SKIP)

##### Download (one-time)

In [5]:
granules = ['*_298_080L_*', '*_341_229R_*']

**Leaf off**

In [None]:
for granule in granules:
    pixc_results = earthaccess.search_data(short_name = 'SWOT_L2_HR_PIXC_2.0',
                                           temporal = ('2023-11-15 00:00:00', '2024-04-30 23:59:59'),
                                           granule_name = granule) # Fort River
    # pixc_results
    # download
earthaccess.download(pixc_results, '../data/small_rivers/mar_2024_ver_c/leaf_off/')

**Leaf on**

In [None]:
for granule in granules:
    
    pixc_results = earthaccess.search_data(short_name = 'SWOT_L2_HR_PIXC_2.0',
                                           temporal = ('2024-05-01 00:00:00', '2024-08-05 23:59:59'),
                                           # granule_name = '*_341_229R_*'
                                           granule_name = granule) # Fort River
    # pixc_results
    # download
    earthaccess.download(pixc_results, '../data/small_rivers/mar_2024_ver_c/leaf_on/')

**#########
Need to automate only downloading the latest version of each granule**
    For now, deleting earlier versions (e.g. 02 when 03 is present) and
    original when reprocessed is present:
            "Where PIC0 and PGC0 products both exist for a particular time,
            users are advised to use the PGC0 products. This is expected
            when reprocessed LR products are generated for cycles 7-9."

In [None]:
# Paths to PIXC files
leaf_on = ['../data/small_rivers/mar_2024_ver_c/SWOT_L2_HR_PIXC_018_298_080L_20240720T131306_20240720T131317_PIC0_03.nc',
           '../data/small_rivers/mar_2024_ver_c/SWOT_L2_HR_PIXC_016_341_229R_20240610T090004_20240610T090015_PIC0_01.nc',
           '../data/small_rivers/mar_2024_ver_c/SWOT_L2_HR_PIXC_017_341_229R_20240701T054507_20240701T054518_PIC0_01.nc',
           '../data/small_rivers/mar_2024_ver_c/SWOT_L2_HR_PIXC_018_341_229R_20240722T023012_20240722T023023_PIC0_01.nc']
# I checked, and there were still no leaves in late April 2024
leaf_off = ['../data/small_rivers/mar_2024_ver_c/SWOT_L2_HR_PIXC_012_298_080L_20240317T084237_20240317T084248_PIC0_01.nc',
            '../data/small_rivers/mar_2024_ver_c/SWOT_L2_HR_PIXC_013_298_080L_20240407T052741_20240407T052752_PIC0_01.nc',
            '../data/small_rivers/mar_2024_ver_c/SWOT_L2_HR_PIXC_014_298_080L_20240428T021247_20240428T021258_PIC0_01.nc',
            '../data/small_rivers/mar_2024_ver_c/SWOT_L2_HR_PIXC_013_341_229R_20240408T184449_20240408T184500_PIC0_01.nc',
            '../data/small_rivers/mar_2024_ver_c/SWOT_L2_HR_PIXC_014_341_229R_20240429T152954_20240429T153005_PIC0_01.nc']

##### Read in and look at flags

In [None]:
# Read in xarray
ds_PIXC = xr.open_mfdataset(paths=leaf_off[4], group = 'pixel_cloud', engine='h5netcdf')

In [None]:
ds_PIXC

In [None]:
counts = pd.DataFrame(ds_PIXC.geolocation_qual).value_counts().sort_index()
counts

In [None]:
counts.sum()

#### Read in Pixel Cloud

In [21]:
def bitwiseMask(ds):
    # Fow now, eliminate the really bad stuff
    mask = np.where(np.logical_and(ds.classification > 1, ds.geolocation_qual < 2**12))[0]
    
    # # If flags < 4 exist, use those to construct mask, discard land class
    # if np.where(gdf.geolocation_qual < 4)[0].size != 0:
    #     mask = np.where(np.logical_and(gdf.classification > 1, gdf.geolocation_qual < 4))[0]
    # # If flags < 4 do not exist, construct mask with xovercal_suspect, discards land class
    # else:
    #     flags = np.logical_or.reduce([gdf.geolocation_qual == 64,
    #                                   gdf.geolocation_qual == 65,
    #                                   gdf.geolocation_qual == 66,
    #                                   gdf.geolocation_qual == 67])
    #     mask = np.where(np.logical_and(gdf.classification > 1, flags))[0]
    print(mask.shape)
    return mask

In [22]:
def makeGDF(pixc):
    # This function takes the pixel cloud, makes a GeoDataFrame,
    # renames columns as needed, projects the CRS, and returns
    # the GDF 
    
    # Create GDF
    gdf_PIXC = gpd.GeoDataFrame(ds_PIXC.classification[mask],
                        geometry=gpd.points_from_xy(ds_PIXC.longitude[mask],
                                                    ds_PIXC.latitude[mask]),
                        crs="EPSG:4326") # PIXC has no native CRS, setting same as River_SP

    gdf_PIXC.rename(columns={gdf_PIXC.columns[0]: 'klass'}, inplace=True)
    
    # Convert the crs to WGS 84 / Pseudo-Mercator
    gdf_PIXC = gdf_PIXC.to_crs(epsg=3857)
    return gdf_PIXC

In [23]:
# Set up
folder = '/nas/cee-water/cjgleason/fiona/data/small_rivers/mar_2024_ver_c/'
# SET SEASON
season = 'leaf_off' # 'leaf_off'
# SET LABEL
on_off = 'Leaf off:' # match above line

In [24]:
# Get all granules for season
files = listdir(join(folder, season))

# Remove hidden checkpoint folder
if '.ipynb_checkpoints' in files:
    files.remove('.ipynb_checkpoints')

In [25]:
files

['SWOT_L2_HR_PIXC_014_298_080L_20240428T021247_20240428T021258_PIC0_01.nc',
 'SWOT_L2_HR_PIXC_014_341_229R_20240429T152954_20240429T153005_PIC0_01.nc',
 'SWOT_L2_HR_PIXC_010_341_229R_20240206T042936_20240206T042947_PIC0_01.nc',
 'SWOT_L2_HR_PIXC_007_341_229R_20231205T141420_20231205T141431_PGC0_01.nc',
 'SWOT_L2_HR_PIXC_011_341_229R_20240227T011441_20240227T011452_PIC0_01.nc',
 'SWOT_L2_HR_PIXC_013_341_229R_20240408T184449_20240408T184500_PIC0_01.nc',
 'SWOT_L2_HR_PIXC_007_298_080L_20231204T005714_20231204T005725_PGC0_01.nc',
 'SWOT_L2_HR_PIXC_009_298_080L_20240114T182724_20240114T182735_PGC0_01.nc',
 'SWOT_L2_HR_PIXC_010_298_080L_20240204T151229_20240204T151241_PIC0_01.nc',
 'SWOT_L2_HR_PIXC_012_298_080L_20240317T084237_20240317T084248_PIC0_01.nc',
 'SWOT_L2_HR_PIXC_013_298_080L_20240407T052741_20240407T052752_PIC0_01.nc',
 'SWOT_L2_HR_PIXC_011_298_080L_20240225T115735_20240225T115746_PIC0_02.nc',
 'SWOT_L2_HR_PIXC_007_341_229R_20231205T141420_20231205T141431_PIC0_01.nc']

In [26]:
# Need to figure out how to open multiple xarrays with different
# dimensions, but for now:

# Open all granules for given season and store in list
d = []

for i in range(len(files)):
    print(i)
    # Read in xarray
    ds_PIXC = xr.open_mfdataset(paths=join(folder, season, files[i]),
                            group='pixel_cloud', engine='h5netcdf')
    if i == 0:
        # Make dict for legend labels
        flags = ds_PIXC.classification.flag_meanings.split() # extract each flag meaning
        codes = {idx:k for idx, k in enumerate(flags, start=1)}
    
    # Make mask
    mask = bitwiseMask(ds_PIXC)
    
    # If dataframe not empty after filtering
    if mask.shape != (0,):
        # Make PIXC
        gdf_PIXC = makeGDF(pixc=ds_PIXC)
        # Append to list
        d.append(gdf_PIXC)

0
(1425915,)
1
(1552029,)
2
(1665038,)
3
(1585773,)
4
(1403521,)
5
(0,)
6
(1321769,)
7
(1745775,)
8
(1507457,)
9
(1484858,)
10
(1639279,)
11
(1286163,)
12
(1585459,)


In [27]:
# Merge masked GeoDataFrames
gdf_PIXC = pd.concat(d)

#### Sjoin

In [28]:
# Reaches for PTs
lower_fort = [10000900041465, 10000900053916, 10000900041464, 10000900016682,
        10000900090995, 10000900090997, 10000900078861, 10000900004532,
        10000900004534, 10000900016914, 10000900091439, 10000900041902,
        10000900079115, 10000900029888] # last reach here crosses rail trail

upper_fort = [10000900091704,
              # 10000900042159, # Not in prepped data, filtered as waterbody
              10000900029890, 10000900042160,
              10000900029892, 10000900054624, 10000900005060, 10000900054628,
              10000900067144, 10000900079368, 10000900005058, 10000900017461,
              10000900005059, 10000900091709, 10000900029896, 10000900017460,
              10000900054626] 
        
cushman = [10000900091444, 10000900091445, 10000900054636,
           10000900042171, 10000900005063,10000900067147, 10000900042169,
           10000900029899, 10000900079371, 10000900005062, 10000900017467,
           10000900054634, 10000900054635, 10000900017468, 10000900042170,
           10000900054639, 10000900017746]

# This is the beaver marsh: 10000900042169, 10000900029899, 10000900079371,
# 10000900005062, 10000900017467, 10000900054634, 10000900054635, 1000090001746, 

In [29]:
# Get just the reaches of interest
# SET REACHES
reaches = basin[basin['NHDPlusID'].isin(cushman)] #

# SET LABEL
river = 'cushman_brook' # 'upper_fort_river'

In [30]:
# Get reach boundaries
bounds = []
for i in range(reaches.shape[0]-1):
    pts = reaches.iloc[i,-1].boundary
    bounds.append(pts)
bounds = gpd.GeoSeries(bounds)

In [31]:
# Buffer reach by 1/2 channel width
reaches['geometry'] = reaches['geometry'].buffer(distance=(reaches['WidthM']/2), cap_style='flat')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [32]:
# Join pixel cloud and buffered reaches 
inside = gpd.sjoin(gdf_PIXC, reaches, how='inner', predicate='within').rename(columns={'index_right': 'segment'})

In [33]:
# Which reaches have pixels?
list(set(inside.NHDPlusID) & set(reaches.NHDPlusID))

[10000900005063.0,
 10000900054636.0,
 10000900054639.0,
 10000900017746.0,
 10000900091444.0,
 10000900091445.0,
 10000900042170.0,
 10000900042171.0]

#### Plotting

In [72]:
pwd

'/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC'

In [73]:
outpath = './figures_for_PTs_2024_08_05/'

In [74]:
# Make color palette
# palette = {2: 'darkolivegreen', 3: 'slateblue', 4: 'steelblue', 5: 'hotpink',
#            6: 'saddlebrown', 7: 'darkslategray'}
palette = {2: 'darkkhaki', 3: 'cornflowerblue', 4: 'blue', 5: 'hotpink',
           6: 'lightsalmon', 7: 'lightblue'}

In [None]:
# Plot the full set of reaches
# Get labels for plot
nums = list(np.unique(inside.klass).astype('int'))
labels = [codes[x] for x in nums]

# Make cmap
cmap = colors.ListedColormap([palette[x] for x in nums])

# Plot
fig, ax = plt.subplots(figsize=(15,15))

reaches.plot(color='w', alpha=0.2, ax=ax)
inside.plot(column=inside.klass, categorical=True, 
            markersize=10, cmap=cmap, legend=True,
            legend_kwds={'labels': labels,
                         'framealpha': 1, 
                         'title_fontsize': 'medium',
                         'loc': 'upper left'},
            ax=ax)
bounds.plot(color='r', marker="s", markersize=50, ax=ax)

# Basemap
cx.add_basemap(ax, crs=gdf_PIXC.crs, source=cx.providers.CartoDB.DarkMatter, alpha=0.9)

# 
leg = ax.get_legend()
leg.set_title("Class")          
ax.title.set_text(on_off + '\n' + str(len(d)) + ' PIXC granules for ' + reaches.iloc[0, 1] + ', ' + str(inside.shape[0]) + ' pixels')
plt.savefig(fname = outpath + river +'_' + season + '_masked'+ '.png', bbox_inches='tight')

In [None]:
# Plot each reach individually
count = 0

for plusid in reaches['NHDPlusID']:
    # Subset to reach, get vars
    reach = reaches[reaches['NHDPlusID'] == plusid]
    name = reach['GNIS_Name'].iloc[0]
    wd = str(np.round(reach['WidthM'].iloc[0], decimals=2))
    ln = str(np.round(reach['LengthKM'].iloc[0], decimals=2))
    
    points = inside[inside['NHDPlusID'] == plusid]
    
    # Get labels for plot
    nums = list(np.unique(points.klass).astype('int'))
    labels = [codes[x] for x in nums]

    # Make cmap
    cmap = colors.ListedColormap([palette[x] for x in nums])

    # Plot
    fig, ax = plt.subplots(figsize=(15,15))

    reach.plot(color='w', alpha=0.2, ax=ax)
    if points.size != 0:
        points.plot(column=points.klass, categorical=True,
                    # markersize=10,
                    cmap=cmap, legend=True,
                    legend_kwds={'labels': labels,
                                 'framealpha': 1, 
                                 'title_fontsize': 'medium',
                                 'loc': 'upper left'},
                    ax=ax)
        leg = ax.get_legend()
        leg.set_title("Class")
    if name == None:
        ax.title.set_text(on_off + ' NULL' + ', NHDPlusID ' + str(int(plusid)) + ', ' + wd + ' m wide, ' + ln + ' km long, \n' + str(points.shape[0]) + ' pixels')
    else:
        ax.title.set_text(on_off + ' ' + name + ', NHDPlusID ' + str(int(plusid)) + ', ' + wd + ' m wide, ' + ln + ' km long, \n' + str(points.shape[0]) + ' pixels')

    # Basemap
    cx.add_basemap(ax, crs=gdf_PIXC.crs, source=cx.providers.CartoDB.DarkMatter,
                   # zoom=20, 
                   alpha=0.9)
    
    # Save fig
    plt.savefig(fname = outpath + river +'_' + season + '_masked_' + str(int(plusid)) 
                # + '_' + str(count) 
                + '.png', bbox_inches='tight')    
    count += 1

#### Scraps

In [None]:
# Get bounds of PIXC, construct polygon
bounds_PIXC = gdf_PIXC.total_bounds
poly = box(bounds_PIXC[0], bounds_PIXC[1], bounds_PIXC[2], bounds_PIXC[3])

In [None]:
crop_basin = basin.clip(poly)

In [None]:
bins = np.unique(crop_basin.Bin)
bins