In [1]:
from argparse import ArgumentParser
import os
import sys
import time

import contextily as ctx
import dask_geopandas
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import numpy as np
import pandas as pd
import piecewise_regression
import rasterio
import seaborn as sns
import shapely
import xarray as xr

# from matplotlib import colors
from matplotlib.ticker import MaxNLocator
from pandarallel import pandarallel
from scipy import stats

# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '../scripts/')

from reaches import *
from utils import *

## For plotting

In [2]:
def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
    new_cmap = colors.LinearSegmentedColormap.from_list(
        'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
        cmap(np.linspace(minval, maxval, n)))
    return new_cmap

In [3]:
cmap = plt.get_cmap('cubehelix')
new_cmap = truncate_colormap(cmap, 0.2, 0.75)

In [4]:
# cmap = plt.get_cmap('cubehelix', 5)
# new_cmap = truncate_colormap(cmap, 0.2, 0.75)

In [5]:
# color = []

# for i in np.linspace(0, 1, 3):
#     color.append(colors.rgb2hex(new_cmap(i)))

In [6]:
# palette = sns.color_palette(color)

### Parse arguments

In [7]:
# FOR NOW, SET
width_set = 'mean'

# Control flow
if width_set == 'mean':
    width = 'WidthM'
    binn = 'Bin'
elif width_set == 'min':
    width = 'WidthM_Min'
    binn = 'Bin_Min'
elif width_set == 'max':
    width = 'WidthM_Max'
    binn = 'Bin_Max'
else:
    print('Invalid width option specified, exiting.')
    # sys.exit()

In [8]:
huc2 = '01' ### SET THIS
data_path = '/nas/cee-water/cjgleason/data/SWOT/PIXC_v2_0_HUC2_' + huc2 ## HERE
# save_dir =

### Pixel Cloud

In [9]:
# Get job index
# slurm = int(os.environ['SLURM_ARRAY_TASK_ID'])
index = 106

In [10]:
mdata_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC/data/' ## HERE
file_path = os.path.join(mdata_path, 'PIXC_v2_0_HUC2_' + huc2 + '_filtered.json') ## HERE
data = open_json(file_path)

In [11]:
file_name = data[index]

In [12]:
# Get data for this tile
granule_name = file_name[:-3]
tile_name = file_name[20:28]
pass_num = int(file_name[20:23])

print(granule_name)

SWOT_L2_HR_PIXC_002_007_235R_20230811T080638_20230811T080649_PGC0_01


#### Read in PIXC

In [13]:
# Set PIXC filepath
pixc_path = os.path.join(data_path, file_name)

In [14]:
pixc_path

'/nas/cee-water/cjgleason/data/SWOT/PIXC_v2_0_HUC2_01/SWOT_L2_HR_PIXC_002_007_235R_20230811T080638_20230811T080649_PGC0_01.nc'

In [15]:
# Read in pixel group
ds_PIXC = xr.open_mfdataset(paths=pixc_path, group = 'pixel_cloud', engine='h5netcdf')

In [16]:
def bitwiseMask(ds):
    '''
    This function masks a PIXC granules: for now, it ony remove pixels
    with land classification and those with bad geolocation_qual.
    # See page 65 of PIXC PDD: https://podaac.jpl.nasa.gov/SWOT?tab=datasets-information&sections=about%2Bdata
    '''
    # Fow now, eliminate the really bad stuff
    mask = np.where((ds.classification > 1) & 
                    (ds.interferogram_qual < 2**7) & (ds.classification_qual < 2**7) &
                    (ds.geolocation_qual < 2**7) & (ds.sig0_qual < 2**7) &
                    (np.abs(ds.cross_track) > 10000) & (np.abs(ds.cross_track) < 60000))[0]
    
    print(mask.shape)
    return mask

In [17]:
# Make mask
mask = bitwiseMask(ds_PIXC)

if mask.shape[0] == 0:
    print('This granule has no pixels after masking, exiting.')
    # sys.exit(1)    

(742270,)


In [18]:
# Set desired data vars
variables = ['azimuth_index', 'range_index', 'cross_track',
             'pixel_area', 'height', 'geoid', 'solid_earth_tide', ## HERE
             'load_tide_fes', 'pole_tide', 'prior_water_prob', ## HERE
             'classification']

In [19]:
# Convert PIXC to GeoDataFrame
gdf_PIXC = makeGDF(ds=ds_PIXC, mask=mask, data_vars=variables)

In [20]:
del ds_PIXC

### Find correct HUC4s

In [21]:
### NHDPlus HR
## Find correct HUC4s
# Read in tile and HUC4 intersection data
# mdata_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC/data/' ## HERE
dtype_dic= {'tile': str, 'huc4': str, 'coverage': float}
tile_huc4 = pd.read_csv(os.path.join(mdata_path,
                                    'huc4_swot_science_tiles.csv'),
                        dtype=dtype_dic)

In [22]:
# Make list of HUC4s that intersect the tile
hucs = list(tile_huc4[(tile_huc4['tile'] == tile_name)]['huc4'])
# Limit to the current HUC2
hucs = [x for x in hucs if x.startswith(huc2)]

In [23]:
hucs

['0101', '0102', '0105']

### Read in buffered flowlines with extra

In [24]:
data_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHD_prepped_buffered_extra/HUC2_' + huc2 + '/'

In [25]:
file_paths = []

for huc in hucs:
    file_path = data_path + 'NHDPLUS_H_' + huc + '_HU4_GDB_prepped_buffered_extra_' + width_set + '.parquet'
    file_paths.append(file_path)

In [26]:
reach_mask = dask_geopandas.read_parquet(path=file_paths, columns=['NHDPlusID', 'buffers'])

In [27]:
reach_mask = reach_mask.compute()

In [28]:
# Clip masked pixels to buffered reaches
gdf_PIXC_clip = gpd.sjoin(gdf_PIXC, reach_mask, how='inner', predicate='within').reset_index().drop(columns=['index', 'index_right'])

In [29]:
if gdf_PIXC_clip.shape[0] == 0:
    print('This granule has no pixels that intersect reaches, exiting.')
    # sys.exit() 

### Nadir track

In [30]:
# Get single pixel for selecting correct nadir segment
pixel_pt = gdf_PIXC_clip.iloc[0].geometry

In [31]:
# Find correct nadir segment and return its geometry
nadir_segment_ln = findNadir(pass_num=pass_num, pixel_pt=pixel_pt)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


### Find alignment

In [32]:
az_nadir = calcAzimuth(line=nadir_segment_ln)

In [33]:
az_nadir

72.41249500094567

#### Read in flowlines

In [34]:
data_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHD_prepped/HUC2_' + huc2 + '/'

In [35]:
file_paths = []

for huc in hucs:
    file_path = data_path + 'NHDPLUS_H_' + huc + '_HU4_GDB_prepped.parquet'
    file_paths.append(file_path)

In [36]:
### SHOULD THIS BE FEWER
fields = ['NHDPlusID', 'GNIS_Name', 'LengthKM', 'WidthM', 'WidthM_Min',
          'WidthM_Max', 'Bin', 'Bin_Min', 'Bin_Max', 'StreamOrde',
          'Slope', 'geometry']

In [37]:
flowlines = dask_geopandas.read_parquet(path=file_paths, columns=fields)

In [38]:
flowlines = flowlines.compute()

In [39]:
flowlines.loc[:,'geometry'] = flowlines.geometry.explode().force_2d()

In [40]:
pandarallel.initialize(nb_workers=int(os.environ.get('SLURM_CPUS_PER_TASK')))

INFO: Pandarallel will run on 2 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [41]:
flowlines['temp'] = flowlines.parallel_apply(user_defined_function=calcAzSin, axis=1)

In [42]:
flowlines[['alignment', 'sinuosity']] = pd.DataFrame(flowlines['temp'].tolist(), index=flowlines.index)
flowlines = flowlines.drop(columns='temp')

### Make pseudo pixels

In [43]:
# Set along-track pixel resolution
azimuth_res = 22 # meters

In [44]:
# Make pseudo pixels
start = time.time()
gdf_PIXC_clip['pseudo_pixel'] = gdf_PIXC_clip.parallel_apply(user_defined_function=makePseudoPixels,
                                                         args=(nadir_segment_ln,
                                                               azimuth_res),
                                                         axis=1)
end = time.time()
print(end - start)

3.786194086074829


In [45]:
gdf_PIXC_clip = gdf_PIXC_clip.rename(columns={'geometry': 'pixel_centroid'}).set_geometry('pseudo_pixel').set_crs(epsg=3857)

In [46]:
## DO I NEED THIS--for areas, yes, for heights, no
# Copy geometry column as sjoin will discard it
gdf_PIXC_clip['pseudo_geom'] = gdf_PIXC_clip.geometry

# Get bounds of PIXC tile with pseudo-pixels
pseudo_bounds = gdf_PIXC_clip.total_bounds

In [47]:
gdf_PIXC_clip.columns

Index(['points', 'azimuth_index', 'range_index', 'cross_track', 'pixel_area',
       'height', 'geoid', 'solid_earth_tide', 'load_tide_fes', 'pole_tide',
       'prior_water_prob', 'klass', 'latitude', 'longitude', 'pixel_centroid',
       'NHDPlusID', 'pseudo_pixel', 'pseudo_geom'],
      dtype='object')

### Read in segments

In [None]:
data_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHD_prepped_segmented_buffered/HUC2_' + huc2 + '/'

In [None]:
file_paths = []

for huc in hucs:
    file_path = data_path + 'NHDPLUS_H_' + huc + '_HU4_GDB_prepped_segmented_buffered_' + width_set + '.parquet'
    file_paths.append(file_path)

In [None]:
segments = dask_geopandas.read_parquet(path=file_paths)

In [None]:
segments = segments.compute()

In [None]:
# Clip segments to the extent of the pseudo-pixels
segments = segments.clip(pseudo_bounds)

In [None]:
# Keep only reaches that are fully contained in PIXC granule
segments = segments.groupby('NHDPlusID').filter(lambda x: len(x) == 10)

In [None]:
segments = segments.sort_values(by=['NHDPlusID', 'counter']).reset_index()

In [None]:
segments = segments.drop(columns='index')

In [None]:
# Keep only reaches that are fully contained in PIXC granule
segments = segments.groupby('NHDPlusID').filter(lambda x: len(x) == 10)

In [None]:
# # Calculate segment area
# segments['segment_area'] = segments.geometry.area

### Read in buffered flowlines

In [48]:
data_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHD_prepped_buffered/HUC2_' + huc2 + '/'

In [49]:
file_paths = []

for huc in hucs:
    file_path = data_path + 'NHDPLUS_H_' + huc + '_HU4_GDB_prepped_buffered_' + width_set + '.parquet'
    file_paths.append(file_path)

In [50]:
reach_extent = dask_geopandas.read_parquet(path=file_paths, columns=['NHDPlusID', 'Slope', 'buffers'])

In [51]:
reach_extent = reach_extent.compute()

### For heights

In [52]:
height_klass = [3, 4, 6, 7]

In [53]:
for_heights = gdf_PIXC_clip[gdf_PIXC_clip.klass.isin(height_klass)].reset_index().drop(columns='index')

In [54]:
len(gdf_PIXC_clip.NHDPlusID.unique())

533

In [55]:
len(for_heights.NHDPlusID.unique())

451

In [59]:
for_heights['wse'] = for_heights.height - for_heights.geoid - for_heights.solid_earth_tide - for_heights.load_tide_fes - for_heights.pole_tide

In [60]:
for_heights = for_heights.drop(columns=['height', 'geoid', 'solid_earth_tide', 'load_tide_fes', 'pole_tide'])

In [63]:
# Clip masked pixels to buffered reaches
test = gpd.sjoin(for_heights, reach_extent, how='inner', predicate='intersects').reset_index()
test = test.drop(columns=['index', 'index_right', 'NHDPlusID_left', 'pseudo_geom'])

In [64]:
test

Unnamed: 0,points,azimuth_index,range_index,cross_track,pixel_area,prior_water_prob,klass,latitude,longitude,pixel_centroid,pseudo_pixel,wse,NHDPlusID_right,Slope
0,157,161.0,1236.0,23140.648438,556.804932,0.0,3.0,45.388645,-67.751008,POINT (-7542007.719 5682914.67),"POLYGON ((-7542017.116 5682928.557, -7542022.7...",104.567116,5.000200e+12,0.000010
1,303,161.0,1688.0,31692.291016,406.816620,0.0,3.0,45.368263,-67.644580,POINT (-7530160.255 5679684.403),"POLYGON ((-7530166.358 5679697.413, -7530172.0...",64.555763,5.000200e+12,0.001725
2,304,161.0,1689.0,31709.978516,406.590179,0.0,3.0,45.368224,-67.644382,POINT (-7530138.197 5679678.374),"POLYGON ((-7530144.294 5679691.382, -7530149.9...",64.450737,5.000200e+12,0.001725
3,305,161.0,1690.0,31727.929688,406.360626,0.0,3.0,45.368219,-67.644354,POINT (-7530135.093 5679677.527),"POLYGON ((-7530141.186 5679690.533, -7530146.8...",63.792355,5.000200e+12,0.001725
4,957,162.0,1234.0,23106.451172,557.627563,0.0,3.0,45.388916,-67.751366,POINT (-7542047.562 5682957.598),"POLYGON ((-7542056.977 5682971.489, -7542062.6...",105.092285,5.000200e+12,0.000010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8404,1601866,1663.0,1050.0,15584.335938,826.115723,0.9,4.0,45.689281,-67.732523,POINT (-7539949.952 5730695.312),"POLYGON ((-7539965.205 5730710.831, -7539970.9...",127.364258,5.000200e+12,0.000100
8405,1601867,1663.0,1051.0,15621.580078,824.151550,0.0,4.0,45.689197,-67.732084,POINT (-7539901.11 5730681.988),"POLYGON ((-7539916.321 5730697.496, -7539922.0...",127.316948,5.000200e+12,0.000100
8406,1601868,1663.0,1052.0,15659.371094,822.167969,0.0,4.0,45.689121,-67.731682,POINT (-7539856.398 5730669.79),"POLYGON ((-7539871.564 5730685.286, -7539877.3...",127.211662,5.000200e+12,0.000100
8407,1601869,1663.0,1053.0,15697.072266,820.198608,0.0,3.0,45.689053,-67.731325,POINT (-7539816.651 5730658.947),"POLYGON ((-7539831.774 5730674.431, -7539837.5...",127.035927,5.000200e+12,0.000100


In [65]:
test = test.rename(columns={'NHDPlusID_right': 'NHDPlusID'})

In [66]:
test = pd.merge(left=test, right=flowlines[['NHDPlusID', 'geometry']], on='NHDPlusID')

In [67]:
test = test.rename(columns={'geometry': 'flowline'})

In [None]:
# del flowlines ????

In [68]:
def project_point(line, point):
    # Project point onto line
    return line.project(point)

In [69]:
test['distance'] = test.apply(lambda x: project_point(x['flowline'], x['pixel_centroid']), axis=1)

In [70]:
test.columns

Index(['points', 'azimuth_index', 'range_index', 'cross_track', 'pixel_area',
       'prior_water_prob', 'klass', 'latitude', 'longitude', 'pixel_centroid',
       'pseudo_pixel', 'wse', 'NHDPlusID', 'Slope', 'flowline', 'distance'],
      dtype='object')

In [71]:
test = test.set_geometry('pixel_centroid')

In [72]:
test = test.drop(columns=['pseudo_pixel', 'flowline'])
# for_heights = for_heights.drop(columns=['flowline'])

In [None]:
# ## ASSUMING THIS IS CORRECT--NEED TO COM BACK AND THINK ABOUT IT
# ## SHOULD I JUST BE INTERSECTING THE CENTROID? SHOULD IT BE WITH THE BUFFER? AGH
# gdf_height = gpd.sjoin(reach_extent, for_heights, predicate='intersects') # [['NHDPlusID', 'counter', 'GNIS_Name', 'height']]

In [None]:
# del for_heights

In [None]:
# gdf_height = gdf_height.rename(columns={'NHDPlusID_left': 'NHDPlusID'})
# gdf_height = gdf_height.sort_values(by=['NHDPlusID']).reset_index().drop(columns=['index', 'NHDPlusID_right'])

In [73]:
# ids = gdf_height['NHDPlusID'].unique()
ids = test['NHDPlusID'].unique()

In [76]:
len(ids)

367

In [77]:
slope_swot = []

for i in ids:
    temp = test[test['NHDPlusID'] == i][['distance', 'wse']]
    dist = temp['distance'].tolist()
    wse = temp['wse'].tolist()
    
    if len(set(dist)) == 1:
        slope_swot.append(np.nan)
    else:
        slope_swot.append(stats.linregress(x=dist, y=wse).slope)

In [78]:
temp = pd.DataFrame({'NHDPlusID': ids, 'slope_swot': slope_swot})

In [79]:
temp = pd.merge(left=temp, right=test[['NHDPlusID', 'Slope']], how='left', on='NHDPlusID')

In [80]:
temp['slope_swot'] = np.abs(temp['slope_swot'])

In [92]:
temp = temp.drop_duplicates(subset='NHDPlusID').reset_index()

In [94]:
temp[temp['slope_match'] == True]

Unnamed: 0,index,NHDPlusID,slope_swot,Slope,slope_match
1,3,5.000200e+12,0.007466,0.001725,True
2,11,5.000200e+12,0.000775,0.000144,True
5,34,5.000200e+12,0.005471,0.003958,True
6,42,5.000200e+12,0.002710,0.002230,True
7,49,5.000200e+12,0.008722,0.005288,True
...,...,...,...,...,...
356,7595,5.000200e+12,0.000825,0.000100,True
357,7642,5.000200e+12,0.004196,0.002166,True
358,7813,5.000200e+12,0.004470,0.001869,True
359,7823,5.000200e+12,0.000016,0.000010,True


In [85]:
len(temp.NHDPlusID.unique())

367

In [86]:
def checkMag(df):
    # if df['slope_swot'] > df['Slope']:
    #     ratio = df['slope_swot'] / df['Slope']
    # else:
    #     ratio = df['Slope'] / df['slope_swot']
    if np.isnan(df['slope_swot']):
        return False
    
    else:
        order1 = math.floor(math.log10(df['slope_swot']))
        order2 = math.floor(math.log10(df['Slope']))
        
    # if (ratio > 0.1) and (ratio < 10):
    #     return True
    # else:
    #     return False

        if order1 == order2:
            return True
        else:
            return False

In [87]:
temp['slope_match'] = temp.apply(func = checkMag, axis=1)

In [88]:
temp

Unnamed: 0,NHDPlusID,slope_swot,Slope,slope_match
0,5.000200e+12,0.020520,0.000010,False
1,5.000200e+12,0.020520,0.000010,False
2,5.000200e+12,0.020520,0.000010,False
3,5.000200e+12,0.007466,0.001725,True
4,5.000200e+12,0.007466,0.001725,True
...,...,...,...,...
8404,5.000200e+12,0.000004,0.000100,False
8405,5.000200e+12,0.000004,0.000100,False
8406,5.000200e+12,0.000004,0.000100,False
8407,5.000200e+12,0.000004,0.000100,False


In [None]:
ids = segments['NHDPlusID'].unique()

In [None]:
segments

In [None]:
segments[segments['NHDPlusID'] == ids[0]].Slope.unique()

In [None]:
test = gpd.sjoin(segments, gdf_PIXC_clip, predicate='intersects').sort_values(by=['NHDPlusID', 'counter']).reset_index().drop(columns=['level_0', 'index_old']) # [['NHDPlusID', 'counter', 'GNIS_Name', 'height']]

In [None]:
for_height = [3, 4, 6, 7]

In [None]:
test = test[test.klass.isin(for_height)].reset_index().drop(columns='level_0')

In [None]:
test['wse'] = test.height - test.geoid - test.solid_earth_tide - test.load_tide_fes - test.pole_tide

In [None]:
test.columns

In [None]:
test[test['NHDPlusID'] == 5000200008760].counter.unique()

In [None]:
test[test['NHDPlusID'] == 5000200008760].WidthM.unique()

In [None]:
def project_point(line, point):
    # Project point onto line
    return line.project(point)

In [None]:
test.shape

In [None]:
test = pd.merge(left=test, right=flowlines[['NHDPlusID', 'geometry']], on='NHDPlusID')

In [None]:
test = test.rename(columns={'geometry': 'flowline'})

In [None]:
test['distance'] = test.apply(lambda x: project_point(x['flowline'], x['pixel_centroid']), axis=1)

In [None]:
ids = test['NHDPlusID'].unique()

In [None]:
data_path = './figures_for_2025_03_17/'

In [None]:
palette = {#2: 'forestgreen', 
           3: 'cornflowerblue', 4: 'blue', 5: 'hotpink',
           6: 'darkkhaki', 7: 'springgreen'} # 6: 'lightsalmon', 7: 'lightblue'

In [None]:
# Make dict for legend labels
flags = ds_PIXC.classification.flag_meanings.split() # extract each flag meaning
codes = {str(idx) + '.0':k for idx, k in enumerate(flags, start=1)}

In [None]:
codes

In [None]:
test.columns

In [None]:
data = test[test['NHDPlusID'] == 5000200008760].sort_values(by='distance')

graph = sns.lmplot(data=data,
                   x='distance', y='wse',
                   hue='klass', palette=palette, fit_reg=False,
                   height=7, aspect=1.42, legend=False)
sns.regplot(data=data, x='distance', y='wse',
            color='gray',
            scatter=False,ax=graph.axes[0, 0])

ax.axline((0, 5000), slope=3., color='C0', label='by slope')

# Get the axes object
ax = graph.axes[0, 0]

plt.plot(data['distance'], abline_values, 'b')

# Get the handles and labels
handles, labels = ax.get_legend_handles_labels()
# Update the labels
new_labels = [codes[label] for label in labels]
# Update the legend
ax.legend(handles, new_labels,
          title='Classification', loc='upper right')

plt.xlabel('Distance along reach [m]')
plt.ylabel('WSE [m]')
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
# plt.title('NHDPlusID: ' + str(id_.astype('int')) +
#           ', width: ' + str(round(data.iloc[0]['WidthM'], 2)) +
#           ' [m], length: ' + str(round(data.iloc[0]['LengthKM'], 2)) +
#           ' [km]')

In [None]:
test

In [None]:
test[test['NHDPlusID'] == 5000200008760]['Slope'].unique()

In [None]:
nhd_slope = flowlines[flowlines['NHDPlusID'] == 5000200008760].Slope

In [None]:
math.floor(nhd_slope)

In [None]:
temp = stats.linregress(data['distance'], data.wse)

In [None]:
temp.slope

In [None]:
# Find the slope and intercept of the best fit line
slope, intercept = np.polyfit(data['distance'], data['wse'], 1)

In [None]:
# Create a list of values in the best fit line
abline_values = [slope * i + intercept for i in data['distance']]

In [None]:
# abline_values

In [None]:
temp.intercept

In [None]:
(temp.rvalue)**2

In [None]:
# data = test[test['NHDPlusID'] == ids[30]].sort_values(by='distance')#.drop_duplicates(subset='distance')
data = test[test['NHDPlusID'] == 5000200008760].sort_values(by='distance')
x = np.array(data['distance'])
y = np.array(data['wse'])
c=np.array(data['klass'])

plt.scatter(x=x, y=y,
            c=c
            # cmap=palette
           )
plt.xlabel('Distance along reach [m]')
plt.legend()
plt.ylabel('WSE [m]')

In [None]:
pw_fit = piecewise_regression.ModelSelection(x, y, max_breakpoints=6)

In [None]:
model = pw_fit.models[0]

print("Plotting fit for model with {} breakpoint(s) . . . ".format(model.n_breakpoints))
model.plot()
pw_fit.models[0].plot_fit(color="red", linewidth=2)
plt.xlabel('Distance along reach [m]')
plt.ylabel('WSE [m]')
plt.title("Fit with {} breakpoints".format(model.n_breakpoints))
plt.show()

In [None]:
# pw_fit = piecewise_regression.Fit(x, y, start_values=[5], n_breakpoints=2)

In [None]:
# pw_fit.summary()

In [None]:
pw_fit.models[0].plot_data(color="grey", s=20)
# Pass in standard matplotlib keywords to control any of the plots
pw_fit.models[0].plot_fit(color="red", linewidth=4)
pw_fit.models[0].plot_breakpoints()
pw_fit.models[0].plot_breakpoint_confidence_intervals()
plt.xlim(x.min()-10, x.max()+10)
plt.xlabel("x")
plt.ylabel("y")
plt.show()
plt.close()

In [None]:
spl = make_splrep(x=x, y=y, k=1, s=0.90)

In [None]:
xs = np.linspace(x.min(), x.max(), 1000)

In [None]:
fig, ax = plt.subplots()
ax.scatter(x, y, color="red", s=20, zorder=20)
ax.plot(xs, spl(xs), linestyle="--", linewidth=1, color="blue", zorder=10)
ax.grid(color="grey", linestyle="--", linewidth=.5, alpha=.5)
ax.set_ylabel("WSE [m]")
ax.set_xlabel("Distance along reach [m]")
plt.show()

In [None]:
for id_ in ids:

    data=test[test['NHDPlusID'] == id_]
    if data.shape[0] > 3:

        # fig, ax = plt.subplots(figsize=(10,7))
        # sns.regplot(data=data,
        #                 x='distance', y='wse',
        #             column='klass', palette=new_cmap
        #            )
        
        graph = sns.lmplot(data=data,
                           x='distance', y='wse',
                           hue='klass', palette=palette, fit_reg=False,
                           height=7, aspect=1.42, legend=False)
        sns.regplot(data=data, x='distance', y='wse',
                    color='gray',
                    scatter=False,ax=graph.axes[0, 0])
        
        # Get the axes object
        ax = graph.axes[0, 0]

        # Get the handles and labels
        handles, labels = ax.get_legend_handles_labels()

        # Update the labels
        new_labels = [codes[label] for label in labels]

        # Update the legend
        ax.legend(handles, new_labels,
                  title='Classification', loc='upper right')
        
        plt.xlabel('Distance along reach [m]')
        plt.ylabel('WSE [m]')
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
        plt.title('NHDPlusID: ' + str(id_.astype('int')) +
                  ', width: ' + str(round(data.iloc[0]['WidthM'], 2)) +
                  ' [m], length: ' + str(round(data.iloc[0]['LengthKM'], 2)) +
                  ' [km]')
        
        plt.savefig(data_path + str(id_.astype('int')) + '.png', bbox_inches='tight')

In [None]:
fig, ax = plt.subplots()
sns.regplot(data=test[test['NHDPlusID'] == 5000200001544],
                x='counter', y='wse'
            # hue='counter', palette=new_cmap
           )

plt.xlabel('Node')
plt.ylabel('WSE [m]')
ax.xaxis.set_major_locator(MaxNLocator(integer=True))

In [None]:
fig, ax = plt.subplots()
sns.regplot(data=test[test['NHDPlusID'] == 5000200006404],
                x='counter', y='wse'
            # hue='counter', palette=new_cmap
           )

plt.xlabel('Node')
plt.ylabel('WSE [m]')
ax.xaxis.set_major_locator(MaxNLocator(integer=True))

In [None]:
fig, ax = plt.subplots()
sns.regplot(data=test[test['NHDPlusID'] == 5000200001746],
                x='counter', y='wse'
            # hue='counter', palette=new_cmap
           )

plt.xlabel('Node')
plt.ylabel('WSE [m]')
ax.xaxis.set_major_locator(MaxNLocator(integer=True))

In [None]:
# Merge the segments and pseudo-puxels by intersection
sj = gpd.sjoin(segments, gdf_PIXC_clip, how='left', predicate='intersects')

In [None]:
sj.columns

In [None]:
sj = sj.drop(columns=['index_right', 'points', 'azimuth_index',
                      'range_index',
                      # 'height', 'geoid',
                      # 'klass',
                      'latitude', 'longitude'])

In [None]:
sj

In [None]:
sj = sj.set_geometry('pseudo_geom')

In [None]:
sj = sj.groupby('NHDPlusID', as_index=False).parallel_apply(user_defined_function=specialDissolve)

In [None]:
sj = sj.reset_index().drop(columns=['level_0', 'level_1'])

In [None]:
sj['pseudo_geom_clip'] = sj.parallel_apply(user_defined_function=specialClip,
                                                         axis=1)

In [None]:
# sj[sj['NHDPlusID'] == 10000900090399].pseudo_geom.plot(cmap='hsv')

In [None]:
# Calculate the pseudo-pixel area within each node
sj['pseudo_area'] = sj.pseudo_geom_clip.area

In [None]:
sj['coverage'] = sj.pseudo_area/sj.segment_area

In [None]:
# sj_w_zero = sj.copy()

In [None]:
# sj_w_zero['coverage'] = sj_w_zero['coverage'].fillna(0)
sj['coverage'] = sj['coverage'].fillna(0)

### Do stats

In [None]:
bins = sj.Bin.unique()

#### Reaches

In [None]:
reaches_cent, reaches_thresh, reaches_min = summarizeCoverage(df=sj, binn=binn,
                                            bins=bins, counts=counts)

In [None]:
reaches_min

In [None]:
reaches_min.sort_values(by=['NHDPlusID'])[::10]

In [None]:
# d = {}
# # d_q = {}
# for i in range(1, 10):
#     threshold = i/10
#     # print(threshold)
    
#     detected = sj.groupby([binn, 'NHDPlusID'])['coverage'].apply(lambda x: (x > threshold).sum()) / 10
#     reach = detected.reset_index()
    
#     # reach = detected.groupby(binn).quantile(q=[x / 100.0 for x in range(0,100,1)]).reset_index()
        
#     d[threshold] = reach

In [None]:
# Add a column for each DataFrame indicating the key
# for threshold, data in d.items():
#     data['threshold'] = threshold
    
for threshold, data in d.items():
    data['threshold'] = threshold

In [None]:
# Concatenate all DataFrames into one
# reaches_desc = pd.concat(d.values())

reaches_cent = pd.concat(d.values()).rename(columns={'level_1': 'centile'})

In [None]:
reaches_cent

In [None]:
# reaches_cent = pd.merge(left=reaches_cent, right=counts, how='left', on=binn)

In [None]:
reaches_cent

In [None]:
reaches_min = pd.DataFrame(sj.groupby('NHDPlusID')['coverage'].min()).reset_index()

In [None]:
reaches_min = pd.merge(left=reaches_min, right=sj[['NHDPlusID', binn]], how='left', on='NHDPlusID')

In [None]:
min_cov

In [None]:
# reaches = pd.DataFrame(data=d).T

In [None]:
# reaches.columns = bins

### Write out

In [None]:
save_path = os.path.join('/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/', 'PIXC_v2_0_HUC2_01')

In [None]:
# Combine node_desc
node_desc_both = pd.concat([node_desc, node_desc_w_zero], ignore_index=True)
node_desc_both

In [None]:
# Combine node_quant
node_quant_both = pd.concat([node_quant, node_quant_w_zero], ignore_index=True)
node_quant_both

In [None]:
# nodes_desc_both.to_csv(os.path.join(save_path, granule_name + '_nodes_describe.csv'))
# nodes_quant_both.to_csv(os.path.join(save_path, granule_name + '_nodes_quantile.csv'))

In [None]:
# reaches_desc.to_csv(os.path.join(save_path, granule_name + '_reaches_describe.csv'))
# reaches_quant.to_csv(os.path.join(save_path, granule_name + '_reaches_quantile.csv'))

In [None]:
test = pd.read_parquet('/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_output/PIXC_v2_0_HUC2_01_2025_03_02_min/SWOT_L2_HR_PIXC_004_242_074L_20230930T103957_20230930T104008_PGC0_01_reaches_thresh.parquet')

In [None]:
test