In [1]:
from argparse import ArgumentParser
import os
import sys
import time

# import contextily as ctx
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
import shapely
import xarray as xr

# from matplotlib import colors
from pandarallel import pandarallel
# from shapely.geometry import box

from reaches import *
from utils import *

### Parse arguments

In [2]:
# parser = ArgumentParser(description='Please specify whether you would\
#                         like to use the min, mean, or max predicted\
#                         bankfull width for this analysis.')
# parser.add_argument('width_set', type=str, help='min, mean, or max')
# args=parser.parse_args()
# width_set = args.width_set

In [3]:
# FOR NOW, SET
width_set = 'min'

# Control flow
if width_set == 'mean':
    width = 'WidthM'
elif width_set == 'min':
    width = 'WidthM_Min'
elif width_set == 'max':
    width = 'WidthM_Max'
else:
    print('Invalid width option specified, exiting.')
    # sys.exit()

### Pixel Cloud

In [4]:
# Get PIXC index metadata
mdata_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC/data/'
dtype_dic= {'cycle': str, 'pass': str, 'tile': str, 'version': str}

# Read in HUC lookup table
pixc_lookup = pd.read_csv(os.path.join(mdata_path,
                                  'PIXC_v2_0_HUC2_01_best_files_no_exits.csv'),
                     dtype=dtype_dic).drop(columns='index')

In [5]:
# Get job index
# slurm = int(os.environ['SLURM_ARRAY_TASK_ID'])
slurm = 0 # 3032

In [6]:
# Get filepath for this tile
file_name = pixc_lookup.loc[slurm, 'files']
granule_name = file_name[:-3]
tile_name = file_name[20:28]
pass_num = int(file_name[20:23])

In [34]:
file_name

'SWOT_L2_HR_PIXC_001_242_071L_20230729T202412_20230729T202423_PGC0_01.nc'

#### Read in PIXC

In [7]:
# Set PIXC filepath
data_path = '/nas/cee-water/cjgleason/fiona/data/PIXC_v2_0_HUC2_01/'
pixc_path = os.path.join(data_path, file_name)

In [8]:
# Read in pixel group
ds_PIXC = xr.open_mfdataset(paths=pixc_path, group = 'pixel_cloud', engine='h5netcdf')

In [33]:
ds_PIXC.false_detection_rate

Unnamed: 0,Array,Chunk
Bytes,10.38 MiB,3.46 MiB
Shape,"(2720989,)","(906997,)"
Dask graph,3 chunks in 2 graph layers,3 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.38 MiB 3.46 MiB Shape (2720989,) (906997,) Dask graph 3 chunks in 2 graph layers Data type float32 numpy.ndarray",2720989  1,

Unnamed: 0,Array,Chunk
Bytes,10.38 MiB,3.46 MiB
Shape,"(2720989,)","(906997,)"
Dask graph,3 chunks in 2 graph layers,3 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.76 MiB,3.46 MiB
Shape,"(2720989,)","(453499,)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 20.76 MiB 3.46 MiB Shape (2720989,) (453499,) Dask graph 6 chunks in 2 graph layers Data type float64 numpy.ndarray",2720989  1,

Unnamed: 0,Array,Chunk
Bytes,20.76 MiB,3.46 MiB
Shape,"(2720989,)","(453499,)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.76 MiB,3.46 MiB
Shape,"(2720989,)","(453499,)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 20.76 MiB 3.46 MiB Shape (2720989,) (453499,) Dask graph 6 chunks in 2 graph layers Data type float64 numpy.ndarray",2720989  1,

Unnamed: 0,Array,Chunk
Bytes,20.76 MiB,3.46 MiB
Shape,"(2720989,)","(453499,)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [30]:
ds_PIXC.bright_land_flag

Unnamed: 0,Array,Chunk
Bytes,10.38 MiB,10.38 MiB
Shape,"(2720989,)","(2720989,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 10.38 MiB 10.38 MiB Shape (2720989,) (2720989,) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",2720989  1,

Unnamed: 0,Array,Chunk
Bytes,10.38 MiB,10.38 MiB
Shape,"(2720989,)","(2720989,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.76 MiB,3.46 MiB
Shape,"(2720989,)","(453499,)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 20.76 MiB 3.46 MiB Shape (2720989,) (453499,) Dask graph 6 chunks in 2 graph layers Data type float64 numpy.ndarray",2720989  1,

Unnamed: 0,Array,Chunk
Bytes,20.76 MiB,3.46 MiB
Shape,"(2720989,)","(453499,)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.76 MiB,3.46 MiB
Shape,"(2720989,)","(453499,)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 20.76 MiB 3.46 MiB Shape (2720989,) (453499,) Dask graph 6 chunks in 2 graph layers Data type float64 numpy.ndarray",2720989  1,

Unnamed: 0,Array,Chunk
Bytes,20.76 MiB,3.46 MiB
Shape,"(2720989,)","(453499,)"
Dask graph,6 chunks in 2 graph layers,6 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [8]:
# Make mask
mask = bitwiseMask(ds_PIXC)

if mask.shape[0] == 0:
    print('This granule has no pixels after masking, exiting.')
    # sys.exit(1)    

(288527,)


In [9]:
# Set desired data vars
variables = ['azimuth_index', 'range_index', 'cross_track',
             'pixel_area', 'height', 'geoid',
             'dlatitude_dphase', 'dlongitude_dphase',
             'dheight_dphase', 'classification']

In [10]:
# Make PIXC into GeoDataFrame
gdf_PIXC = makeGDF(ds=ds_PIXC, mask=mask, data_vars=variables)

### Find correct HUC4s

In [11]:
# Read in tile and HUC4 intersection data
dtype_dic= {'tile': str, 'huc4': str, 'coverage': float}
tile_huc4 = pd.read_csv(os.path.join(mdata_path,
                                    'huc4_swot_science_tiles.csv'),
                        dtype=dtype_dic)

In [12]:
# Make list of HUC4s that intersect our tile
hucs = list(tile_huc4[tile_huc4['tile'] == tile_name]['huc4'])

In [13]:
hucs

['0101']

In [14]:
# Get NHD index metadata
# Define dtypes for lookup tables to preserve leading zeros
dtype_dic= {'HUC4': str, 'HUC2': str, 'toBasin': str, 'level': str}
# Read in HUC lookup table
huc_lookup = pd.read_csv(os.path.join(mdata_path,
                                  'HUC4_lookup_no_great_lakes.csv'),
                     dtype=dtype_dic)

In [15]:
# Extract indices for read-in
indices = list(huc_lookup[huc_lookup['HUC4'].isin(hucs)]['slurm_index'])

### Read in HUC4 flowliness

In [16]:
# Create merged dataframe of all flowliness intersected
if len(indices) == 1:
    # Read prepped NHD
    flowlines, huc4_list, huc2_list = readNHD(index=indices[0])

else:
    # Initialize lists
    d = []
    huc4_list = []
    huc2_list = []
    
    # Loop through indices and store in lists
    for i in indices:

        # Read prepped NHD
        flowlines, huc4, huc2 = readNHD(index=i)

        # Append to lists
        d.append(flowlines)
        huc4_list.append(huc4)
        huc2_list.append(huc2)
        
    # Merge GeoDataFrames
    flowlines = pd.concat(d)

type: normal
NHDPLUS_H_0101_HU4_GDB
flowlines read-in
exploded


In [17]:
# Project CRS (currently to WGS 84 / UTM zone 18N)
flowlines = flowlines.to_crs(epsg=32618)

In [18]:
pandarallel.initialize(nb_workers=int(os.environ.get('SLURM_CPUS_PER_TASK')))

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [19]:
# xxx HOW TO MAKE THIS FASTER?
# Buffer with an extra 50 m on each side to be safe
# This is beyond the max distance that the pixels
# could extend once converted to pseudo pixels
# start = time.time()
# flowlines['buffer'] = flowlines.buffer(distance=((flowlines.WidthM/2)+50), cap_style='flat')
# end = time.time()
# print(end - start)
# #  = 

In [20]:
start = time.time()
flowlines['buffer'] = flowlines.parallel_apply(user_defined_function=specialBuffer,
                                                         args=(width,
                                                               'flat', False, True),
                                                         axis=1)
end = time.time()
print(end - start)

1.7450525760650635


In [21]:
# Set geometry to buffered reaches
flowlines = flowlines.set_geometry('buffer').set_crs(epsg=32618)

In [23]:
# flowlines.crs

In [22]:
# Clip masked pixels to buffered reaches
gdf_PIXC_clip = gpd.sjoin(gdf_PIXC, flowlines, how='inner', predicate='within')

In [23]:
if gdf_PIXC_clip.shape[0] == 0:
    print('This granule has no pixels that intersect reaches, exiting.')
    # sys.exit() 

In [24]:
# Drop unneeded cols
gdf_PIXC_clip = gdf_PIXC_clip.drop(columns=['index_right',
                                            'Bin', 'GNIS_Name',
                                            'LengthKM', 'NHDPlusID',
                                            'WidthM', 'geometry_right'])

### Land Cover

In [27]:
data_path = '/nas/cee-water/cjgleason/data/NLCD/Annual_NLCD_LndCov_2023_CU_C1V0.tif'

In [28]:
land = rasterio.open(data_path)

In [34]:
land.plot()

AttributeError: 'DatasetReader' object has no attribute 'plot'

In [None]:
import rasterio
import geopandas
from rasterio.mask import mask

# Read the raster data
with rasterio.open("path/to/your/raster.tif") as src:
    raster_img = src.read(1)
    raster_profile = src.profile

# Read the polygon shapefile
gdf = geopandas.read_file("path/to/your/polygon.shp")

# Ensure the polygon's CRS matches the raster's CRS
if gdf.crs != raster_profile['crs']:
    gdf = gdf.to_crs(raster_profile['crs'])

# Get the geometry of the polygon
geoms = gdf.geometry.values

# Clip the raster with the polygon
clipped_raster, transform = mask(dataset=src, shapes=geoms, crop=True)

# Update the raster profile with new transform and dimensions
raster_profile.update({
    "height": clipped_raster.shape[1],
    "width": clipped_raster.shape[2],
    "transform": transform
})

# Save the clipped raster
with rasterio.open("path/to/output/clipped_raster.tif", "w", **raster_profile) as dest:
    dest.write(clipped_raster)

### Nadir track

In [28]:
# Get single pixel for selecting correct nadir segment
pixel_pt = gdf_PIXC_clip.iloc[0].geometry

In [29]:
# Find correct nadir segment and return its geometry
nadir_segment_ln = findNadir(pass_num=pass_num, pixel_pt=pixel_pt)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


### Make pseudo pixels

In [30]:
# Set along-track pixel resolution
azimuth_res = 21 # meters

In [31]:
# Make pseudo pixels
start = time.time()
gdf_PIXC_clip['pseudo_pixel'] = gdf_PIXC_clip.parallel_apply(user_defined_function=makePseudoPixels,
                                                         args=(nadir_segment_ln,
                                                               azimuth_res),
                                                         axis=1)
end = time.time()
print(end - start)

6.060180902481079


In [32]:
# fig, ax = plt.subplots(figsize=(8,8))
# gdf_PIXC_clip.iloc[5:10]['pseudo_pixel'].plot(ax=ax, alpha=0.5, color='y')
# gdf_PIXC_clip.iloc[5:10].plot(ax=ax, markersize=5, color='hotpink')

In [33]:
# xxxWHY NOT JUST KEEP THE SAME DATA FRAME AND DROP THE UNWANTED COLS?
# pseudo = gdf_PIXC_clip.drop(columns='geometry').set_geometry('pseudo_pixel').set_crs(crs=gdf_PIXC_clip.crs)
gdf_PIXC_clip = gdf_PIXC_clip.rename(columns={'geometry': 'pixel_centroid'}).set_geometry('pseudo_pixel')

In [34]:
# Get bounds of PIXC tile
pseudo_bounds = gdf_PIXC_clip.total_bounds
# Copy geometry column as sjoin will discard it
gdf_PIXC_clip['pseudo_geom'] = gdf_PIXC_clip.geometry
# pseudo_poly = box(pseudo_bounds[0], pseudo_bounds[1],
#                       pseudo_bounds[2], pseudo_bounds[3])
# gdf_pseudo_bounds = gpd.GeoDataFrame({'geometry': [pseudo_bounds]}, crs=pseudo.crs)

In [35]:
# save_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/PIXC_v2_0_pseudo_pixels_filtered/'

In [36]:
# gdf_PIXC.to_parquet(path=save_path + granule_name + '.parquet')

### Read in segments

In [40]:
# Create merged dataframe of all basins intersected
if len(indices) == 1:
    # Read prepped NHD
    segments, _, _ = readNHD(index=indices[0], segmented=True)
else:
    # Initialize lists
    d = []
    # Loop through indices and store in lists
    for i in indices:
        # Read prepped NHD
        segments, huc4, _ = readNHD(index=i, segmented=True)
        # Make column with HUC4 id
        segments['huc4_long'] = huc4
        segments['huc4'] = segments['huc4_long'].str[10:14]
        # Rename segments to geometry
        # segments = segments.rename(columns={'segments': 'geometry'}).set_geometry('geometry')
        # Append to list
        d.append(segments)
    # Merge GeoDataFrames
    segments = pd.concat(d)

type: segmented
NHDPLUS_H_0202_HU4_GDB
/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHD_prepped_segmented/HUC2_02/NHDPLUS_H_0202_HU4_GDB_prepped_segmented.parquet
segments read-in
type: segmented
NHDPLUS_H_0430_HU4_GDB
/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHD_prepped_segmented/HUC2_04/NHDPLUS_H_0430_HU4_GDB_prepped_segmented.parquet
segments read-in


In [43]:
segments.columns

Index(['NHDPlusID', 'GNIS_Name', 'LengthKM', 'WidthM', 'WidthM_Min',
       'WidthM_Max', 'Bin', 'geometry', 'huc4_long', 'huc4'],
      dtype='object')

In [38]:
# # Cast objects to string type so they aren't dropped in groupby()
# segments['Bin'] = segments['Bin'].astype('|S')
# segments['GNIS_Name'] = segments['GNIS_Name'].astype(str).str.encode('utf-8', errors='replace').str.decode('utf-8')

In [39]:
# segments['GNIS_Name'] = segments['GNIS_Name'].astype('|S')

In [40]:
# Project CRS (currently to WGS 84 / UTM zone 18N)
segments = segments.to_crs(epsg='32618')

In [41]:
segments = segments.reset_index().rename(columns={'index': 'index_old'})

In [42]:
# Assign a unique counter within each index group
segments['counter'] = segments.groupby('NHDPlusID').cumcount()

In [43]:
# Keep only first ten segments (some reaches repeat)
segments = segments[segments['counter'] < 10]

# For HUC4_0109, 5000700035256 and 5000700072690

In [44]:
segments = segments.clip(pseudo_bounds)

In [45]:
# Keep only reaches that are fully contained in PIXC granule
segments = segments.groupby('NHDPlusID').filter(lambda x: len(x) == 10)

In [46]:
# Buffer segments
## PARALLELIZE
start = time.time()
segments['buffer'] = segments.parallel_apply(user_defined_function=specialBuffer,
                                                         args=(width,
                                                               'flat', True, False),
                                                         axis=1)
end = time.time()
print(end - start)
# segments['buffered'] = segments.buffer(distance=(segments.WidthM/2), cap_style='flat')

3.546569585800171


In [47]:
segments = segments.set_geometry('buffer')

In [48]:
# Calculate segment area
segments['segment_area'] = segments.geometry.area

In [None]:
# segments.sort_values(['NHDPlusID', 'counter'])

In [None]:
# segments_buff = segments.geometry.buffer(distance=(segments.WidthM/2), cap_style='flat')

In [None]:
# segment_bounds = segments_buff.bounds

In [None]:
# ## Clip the pseudo pixels to the bounds of the reach
# pseudo_all = pseudo.union_all()

In [None]:
# fig, ax = plt.subplots(figsize=(8,8))
# pseudo.plot(ax=ax, column='klass')
# segments.plot(ax=ax, color='k', alpha=0.6)
# plt.xlim(824000, 826000)
# plt.ylim(4741000, 4743000)

In [49]:
# Merge the segments and pseudo-puxels by intersection
sj = gpd.sjoin(segments, gdf_PIXC_clip, how='left', predicate='intersects')

In [50]:
sj = sj.drop(columns=['index_right', 'points', 'azimuth_index',
                      'range_index', 'cross_track', 'pixel_area',
                      'height', 'geoid', 'dlatitude_dphase',
                      'dlongitude_dphase', 'dheight_dphase',
                      'klass', 'latitude', 'longitude', ])

In [51]:
sj = sj.set_geometry('pseudo_geom')

In [52]:
sj = sj.groupby('NHDPlusID', as_index=False).parallel_apply(user_defined_function=specialDissolve)

In [53]:
sj = sj.reset_index().drop(columns=['level_0', 'level_1'])

In [None]:
# def specialClip(df):
#     '''
#     XXX
#     '''
#     left = gpd.GeoSeries(df.pseudo_geom)
#     right = gpd.GeoSeries(df.buffer)
#     pseudo_geom_clip = left.clip(right)
#     return pseudo_geom_clip

In [54]:
sj['pseudo_geom_clip'] = sj.parallel_apply(user_defined_function=specialClip,
                                                         axis=1)

In [None]:
# sj[sj['NHDPlusID'] == 10000900090399].pseudo_geom.plot(cmap='hsv')

In [55]:
# Calculate the pseudo-pixel area within each node
sj['pseudo_area'] = sj.pseudo_geom_clip.area

In [56]:
sj['coverage'] = sj.pseudo_area/sj.segment_area

In [None]:
# sj_w_zero = sj.copy()

In [58]:
# sj_w_zero['coverage'] = sj_w_zero['coverage'].fillna(0)
sj['coverage'] = sj['coverage'].fillna(0)

In [None]:
# mask = sj.duplicated(subset=('NHDPlusID', 'counter'), keep=False)

In [None]:
# sj[mask][['NHDPlusID', 'counter', 'overlap_area']]

In [None]:
# dupe_mask = sj.duplicated(subset=['NHDPlusID', 'counter'], keep=False)

In [None]:
# sj[dupe_mask].segment_area

In [None]:
# len(sj[dupe_mask].index.unique())

In [None]:
# sj = sj.groupby(['NHDPlusID', 'counter'], as_index=False).sum('overlap_area')

In [None]:
# sj.sort_values(by=['NHDPlusID', 'counter'])

In [None]:
# sj['coverage'] = sj.overlap_area/sj.segment_area

In [None]:
# sj.sort_values(by=['NHDPlusID', 'counter'])

In [None]:
# sj[sj['overlap_area'] > 0].sort_values(['index_old', 'counter'])

In [None]:
# Merge back on Bin and GNIS_Name (dropped in groupby because they are objects)
# sj = pd.merge(left=sj, right=flowlines[['NHDPlusID', 'Bin', 'GNIS_Name']], on='NHDPlusID', how='left')

In [None]:
# sj = sj.rename(columns={'Bin_x': 'Bin', 'GNIS_Name_x': 'GNIS_Name'}).drop(columns=['Bin_y', 'GNIS_Name_y'])

### Do stats

In [60]:
bins = sj.Bin.unique()

#### Nodes

In [None]:
# nodes_mean = sj.groupby('Bin')['coverage'].mean().to_list()

In [None]:
# nodes_std = sj.groupby('Bin')['coverage'].std().to_list()

In [None]:
# nodes_count = sj.groupby('Bin')['coverage'].count().to_list()

In [None]:
# node_desc = sj.groupby('Bin')['coverage'].describe().reset_index()
# node_desc['with_zero'] = 0

In [63]:
# node_quant = pd.DataFrame(sj.groupby('Bin')['coverage'].quantile(q=[x / 100.0 for x in range(0,100,1)])).reset_index().rename(columns={'level_1': 'quantile'})
# node_quant['with_zero'] = 0

In [None]:
# plt.errorbar(x=node_desc.Bin, y=node_desc['mean'], yerr=node_desc['std'])

In [None]:
# d = {'mean': nodes_mean, 'std': nodes_std, 'count': nodes_count}

In [None]:
# nodes = pd.DataFrame(data=d).T

In [None]:
# nodes.columns = bins

#### Nodes with zeros

In [None]:
node_desc_w_zero = sj_w_zero.groupby('Bin')['coverage'].describe().reset_index()
node_desc_w_zero['with_zero'] = 1
node_desc_w_zero

In [None]:
node_quant_w_zero = pd.DataFrame(sj_w_zero.groupby('Bin')['coverage'].quantile(q=[x / 100.0 for x in range(0,100,1)])).reset_index().rename(columns={'level_1': 'quantile'})
node_quant_w_zero['with_zero'] = 1
node_quant_w_zero

#### Reaches

In [77]:
d = {}
# d_q = {}
for i in range(1, 10):
    threshold = i/10
    
    detected = sj.groupby(['Bin', 'NHDPlusID'])['coverage'].apply(lambda x: (x > threshold).sum()) / 10
    
    reach = detected.groupby('Bin').quantile(q=[x / 100.0 for x in range(0,100,1)]).reset_index()
    
    d[threshold] = reach

In [79]:
# Add a column for each DataFrame indicating the key
# for threshold, data in d.items():
#     data['threshold'] = threshold
    
for threshold, data in d.items():
    data['threshold'] = threshold

In [75]:
# Concatenate all DataFrames into one
# reaches_desc = pd.concat(d.values())

reaches_cent = pd.concat(d.values()).rename(columns={'level_1': 'quantile'})

In [76]:
reaches_cent

Unnamed: 0,Bin,quantile,coverage,threshold
0,"(0, 10]",0.00,0.0,0.1
1,"(0, 10]",0.01,0.0,0.1
2,"(0, 10]",0.02,0.0,0.1
3,"(0, 10]",0.03,0.0,0.1
4,"(0, 10]",0.04,0.0,0.1
...,...,...,...,...
1195,"(90, 100]",0.95,0.0,0.9
1196,"(90, 100]",0.96,0.0,0.9
1197,"(90, 100]",0.97,0.0,0.9
1198,"(90, 100]",0.98,0.0,0.9


In [None]:
# reaches = pd.DataFrame(data=d).T

In [None]:
# reaches.columns = bins

### Write out

In [None]:
save_path = os.path.join('/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/', 'PIXC_v2_0_HUC2_01')

In [None]:
# Combine node_desc
node_desc_both = pd.concat([node_desc, node_desc_w_zero], ignore_index=True)
node_desc_both

In [None]:
# Combine node_quant
node_quant_both = pd.concat([node_quant, node_quant_w_zero], ignore_index=True)
node_quant_both

In [None]:
# nodes_desc_both.to_csv(os.path.join(save_path, granule_name + '_nodes_describe.csv'))
# nodes_quant_both.to_csv(os.path.join(save_path, granule_name + '_nodes_quantile.csv'))

In [None]:
# reaches_desc.to_csv(os.path.join(save_path, granule_name + '_reaches_describe.csv'))
# reaches_quant.to_csv(os.path.join(save_path, granule_name + '_reaches_quantile.csv'))

In [3]:
test = pd.read_parquet('/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_output/PIXC_v2_0_HUC2_01_2025_02_04_min/SWOT_L2_HR_PIXC_001_270_077L_20230730T202544_20230730T202555_PGC0_01_reaches_cent.parquet')

In [4]:
test

Unnamed: 0,Bin,quantile,coverage,threshold
0,"(0, 10]",0.00,0.000,0.1
1,"(0, 10]",0.01,0.000,0.1
2,"(0, 10]",0.02,0.000,0.1
3,"(0, 10]",0.03,0.000,0.1
4,"(0, 10]",0.04,0.000,0.1
...,...,...,...,...
995,"(90, 100]",0.95,0.510,0.9
996,"(90, 100]",0.96,0.624,0.9
997,"(90, 100]",0.97,0.693,0.9
998,"(90, 100]",0.98,0.762,0.9


### Conn tests

In [None]:
conn = sj[sj['NHDPlusID'] == 10000900090399]

In [None]:
conn = conn.drop(columns=['index_right', 'points', 'azimuth_index',
                      'range_index', 'cross_track', 'pixel_area',
                      'height', 'geoid', 'dlatitude_dphase',
                      'dlongitude_dphase', 'dheight_dphase',
                      'klass', 'latitude', 'longitude', ])

In [None]:
conn = conn.set_geometry('pseudo_geom')

In [None]:
conn = conn.dissolve(by='counter')

In [None]:
# conn

In [None]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv')
# conn.plot(ax=ax, color='k', alpha=0.6)

In [None]:
def specialClip(sj):
    left = gpd.GeoSeries(sj.pseudo_geom)
    right = gpd.GeoSeries(sj.buffered)
    pseudo_geom_clip = left.clip(right)
    return pseudo_geom_clip

In [None]:
conn['pseudo_geom_clip'] = conn.pseudo_geom.clip(conn.buffered)

In [None]:
conn['pseudo_geom_test'] = conn.parallel_apply(user_defined_function=specialClip,
                                               axis=1)

In [None]:
conn

In [None]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv', alpha=0.7)
# conn.pseudo_geom.plot(ax=ax, color='k')

# # Basemap
# ctx.add_basemap(ax, crs=conn.crs, source=ctx.providers.CartoDB.Positron)

# plt.axis('off')

In [None]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv', alpha=0.7)
# gpd.GeoSeries(conn.iloc[7].pseudo_geom).plot(ax=ax, color='k')

# # Basemap
# ctx.add_basemap(ax, crs=conn.crs, source=ctx.providers.CartoDB.Positron)

# plt.axis('off')

In [None]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv', alpha=0.7)
# gpd.GeoSeries(conn.iloc[7].pseudo_geom_clip).plot(ax=ax, color='k')

# # Basemap
# ctx.add_basemap(ax, crs=conn.crs, source=ctx.providers.CartoDB.Positron)

# plt.axis('off')

In [None]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv', alpha=0.7)
# gpd.GeoSeries(conn.iloc[7].pseudo_geom_test).plot(ax=ax, color='k')

# # Basemap
# ctx.add_basemap(ax, crs=conn.crs, source=ctx.providers.CartoDB.Positron)

# plt.axis('off')

In [None]:
# Calculate segment area
conn['overlap_total'] = conn.pseudo_geom.area

In [None]:
# Calculate segment area
conn['overlap_new'] = conn.pseudo_geom_clip.area

In [None]:
conn

In [None]:
conn.overlap_total / conn.segment_area

In [None]:
conn.overlap_new / conn.segment_area

In [None]:
# sj.columns

In [None]:
# sj.dtypes

In [None]:
# sj.index.unique()

In [None]:
# Find the overlap area
sj['overlap_area'] = sj.parallel_apply(lambda x: x['buffer'].intersection(x['pseudo_geom']).area if x['pseudo_geom'] is not None else 0, axis=1)

In [None]:
# sj.sort_values(by=['NHDPlusID', 'counter'])

In [None]:
conn['overlap_total'] = conn.groupby(['NHDPlusID', 'counter'])['overlap_area'].transform('sum')

In [None]:
conn