In [1]:
from argparse import ArgumentParser
import os
import sys
import time

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
import shapely
import xarray as xr

# from matplotlib import colors
from pandarallel import pandarallel

from reaches import *
from utils import *

### Parse arguments

In [2]:
# FOR NOW, SET
width_set = 'min'

# Control flow
if width_set == 'mean':
    width = 'WidthM'
    binn = 'Bin'
elif width_set == 'min':
    width = 'WidthM_Min'
    binn = 'Bin_Min'
elif width_set == 'max':
    width = 'WidthM_Max'
    binn = 'Bin_Max'
else:
    print('Invalid width option specified, exiting.')
    # sys.exit()

In [3]:
huc2 = '01' ### SET THIS
data_path = '/nas/cee-water/cjgleason/fiona/data/PIXC_v2_0_HUC2_' + huc2
# save_dir =

### Pixel Cloud

In [4]:
# Get job index
# slurm = int(os.environ['SLURM_ARRAY_TASK_ID'])
index = 3032

In [5]:
huc2 = '01'
file_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC/data/PIXC_v2_0_HUC2_' + huc2 + '_filtered.json'
data = open_json(file_path)

In [6]:
file_name = data[index]

In [7]:
# Get data for this tile
granule_name = file_name[:-3]
tile_name = file_name[20:28]
pass_num = int(file_name[20:23])

print(granule_name)

SWOT_L2_HR_PIXC_019_063_232R_20240802T005329_20240802T005340_PIC0_01


#### Read in PIXC

In [8]:
# Set PIXC filepath
pixc_path = os.path.join(data_path, file_name)

In [9]:
# Read in pixel group
ds_PIXC = xr.open_mfdataset(paths=pixc_path, group = 'pixel_cloud', engine='h5netcdf')

In [10]:
# Make mask
mask = bitwiseMask(ds_PIXC)

if mask.shape[0] == 0:
    print('This granule has no pixels after masking, exiting.')
    # sys.exit(1)    

(140257,)


In [11]:
# Set desired data vars
variables = ['azimuth_index', 'range_index', 'cross_track',
             'pixel_area', 'height', 'geoid', 'prior_water_prob',
             'classification']

In [12]:
# Convert PIXC to GeoDataFrame
gdf_PIXC = makeGDF(ds=ds_PIXC, mask=mask, data_vars=variables)

### Find correct HUC4s

In [13]:
### NHDPlus HR
## Find correct HUC4s
# Read in tile and HUC4 intersection data
mdata_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC/data/'
dtype_dic= {'tile': str, 'huc4': str, 'coverage': float}
tile_huc4 = pd.read_csv(os.path.join(mdata_path,
                                    'huc4_swot_science_tiles.csv'),
                        dtype=dtype_dic)

In [14]:
# Make list of HUC4s that intersect the tile
hucs = list(tile_huc4[(tile_huc4['tile'] == tile_name)]['huc4'])
# Limit to the current HUC2
hucs = [x for x in hucs if x.startswith(huc2)]

In [15]:
hucs

['0108']

In [16]:
# Get NHD index metadata
# Define dtypes for lookup tables to preserve leading zeros
dtype_dic= {'HUC4': str, 'HUC2': str, 'toBasin': str, 'level': str}
# Read in HUC lookup table
huc_lookup = pd.read_csv(os.path.join(mdata_path,
                                  'HUC4_lookup_no_great_lakes.csv'),
                     dtype=dtype_dic)

In [17]:
# Extract indices for read-in
indices = list(huc_lookup[huc_lookup['HUC4'].isin(hucs)]['slurm_index'])

### Read in HUC4 flowliness

In [18]:
# Create merged dataframe of all flowlines intersected
if len(indices) == 1:
    # Read prepped NHD
    flowlines, _, _,  = readNHD(index=indices[0])
    # huc4_list, huc2_list = readNHD(index=indices[0])
else:
    # Initialize lists
    d = []
    # huc4_list = []
    # huc2_list = []
    # Loop through indices and store in lists
    for i in indices:
        # Read prepped NHD
        flowlines, _, _ = readNHD(index=i)
        # huc4, huc2 = readNHD(index=i)
        # Append to lists
        d.append(flowlines)
        # huc4_list.append(huc4) # I DON'T DO ANYTHING WITH THIS
        # huc2_list.append(huc2) # I DON'T DO ANYTHING WITH THIS
    # Merge GeoDataFrames
    flowlines = pd.concat(d)

type: normal
NHDPLUS_H_0108_HU4_GDB
flowlines read-in
exploded


In [19]:
# Project CRS (currently to WGS 84 / UTM zone 18N) 
flowlines = flowlines.to_crs(epsg=32618)

In [20]:
pandarallel.initialize(nb_workers=int(os.environ.get('SLURM_CPUS_PER_TASK')))

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [21]:
start = time.time()
# Args are the width, cap_style, segmented=False, extra=False
# Buffering with extra distance to capture pixels that would overlap
# once converted to pseudo-pixels
flowlines['buffer'] = flowlines.parallel_apply(user_defined_function=specialBuffer,
                                                         args=(width,
                                                               'flat', False, True),
                                                         axis=1)
end = time.time()
print(end - start)

4.612061977386475


In [22]:
# Set geometry to buffered reaches
flowlines = flowlines.set_geometry('buffer').set_crs(epsg=32618)

In [27]:
# Clip masked pixels to buffered reaches
gdf_PIXC_clip = gpd.sjoin(gdf_PIXC, flowlines, how='inner', predicate='within')

In [28]:
if gdf_PIXC_clip.shape[0] == 0:
    print('This granule has no pixels that intersect reaches, exiting.')
    # sys.exit() 

In [29]:
gdf_PIXC_clip.columns

Index(['points', 'azimuth_index', 'range_index', 'cross_track', 'pixel_area',
       'height', 'geoid', 'prior_water_prob', 'klass', 'latitude', 'longitude',
       'geometry', 'index_right', 'NHDPlusID', 'GNIS_Name', 'LengthKM',
       'WidthM', 'WidthM_Min', 'WidthM_Max', 'Bin', 'Bin_Min', 'Bin_Max',
       'geometry_right'],
      dtype='object')

In [30]:
# Drop unneeded cols
gdf_PIXC_clip = gdf_PIXC_clip.drop(columns=['index_right', 'NHDPlusID',
                                            'GNIS_Name', 'LengthKM',
                                            'WidthM', 'WidthM_Min',
                                            'WidthM_Max', 'Bin', 'Bin_Min',
                                            'Bin_Max', 'geometry_right'])

In [31]:
### STOPPED HERE 02-25-25

### Nadir track

In [32]:
# Get single pixel for selecting correct nadir segment
pixel_pt = gdf_PIXC_clip.iloc[0].geometry

In [33]:
# Find correct nadir segment and return its geometry
nadir_segment_ln = findNadir(pass_num=pass_num, pixel_pt=pixel_pt)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


### Make pseudo pixels

In [34]:
# Set along-track pixel resolution
azimuth_res = 21 # meters

In [35]:
# Make pseudo pixels
start = time.time()
gdf_PIXC_clip['pseudo_pixel'] = gdf_PIXC_clip.parallel_apply(user_defined_function=makePseudoPixels,
                                                         args=(nadir_segment_ln,
                                                               azimuth_res),
                                                         axis=1)
end = time.time()
print(end - start)

0.7610683441162109


In [36]:
# fig, ax = plt.subplots(figsize=(8,8))
# gdf_PIXC_clip.iloc[5:10]['pseudo_pixel'].plot(ax=ax, alpha=0.5, color='y')
# gdf_PIXC_clip.iloc[5:10].plot(ax=ax, markersize=5, color='hotpink')

In [37]:
# xxxWHY NOT JUST KEEP THE SAME DATA FRAME AND DROP THE UNWANTED COLS?
# pseudo = gdf_PIXC_clip.drop(columns='geometry').set_geometry('pseudo_pixel').set_crs(crs=gdf_PIXC_clip.crs)
gdf_PIXC_clip = gdf_PIXC_clip.rename(columns={'geometry': 'pixel_centroid'}).set_geometry('pseudo_pixel')

In [38]:
# Get bounds of PIXC tile
pseudo_bounds = gdf_PIXC_clip.total_bounds
# Copy geometry column as sjoin will discard it
gdf_PIXC_clip['pseudo_geom'] = gdf_PIXC_clip.geometry
# pseudo_poly = box(pseudo_bounds[0], pseudo_bounds[1],
#                       pseudo_bounds[2], pseudo_bounds[3])
# gdf_pseudo_bounds = gpd.GeoDataFrame({'geometry': [pseudo_bounds]}, crs=pseudo.crs)

In [39]:
# save_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/PIXC_v2_0_pseudo_pixels_filtered/'

In [40]:
# gdf_PIXC.to_parquet(path=save_path + granule_name + '.parquet')

### Read in segments

In [111]:
# Create merged dataframe of all basins intersected
if len(indices) == 1:
    # Read prepped NHD
    segments, _, _ = readNHD(index=indices[0], segmented=True)
else:
    # Initialize lists
    d = []
    # Loop through indices and store in lists
    for i in indices:
        # Read prepped NHD
        segments, huc4, _ = readNHD(index=i, segmented=True)
        # Make column with HUC4 id
        segments['huc4_long'] = huc4
        segments['huc4'] = segments['huc4_long'].str[10:14]
        # Rename segments to geometry
        # segments = segments.rename(columns={'segments': 'geometry'}).set_geometry('geometry')
        # Append to list
        d.append(segments)
    # Merge GeoDataFrames
    segments = pd.concat(d)

type: segmented
NHDPLUS_H_0108_HU4_GDB
/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/NHD_prepped_segmented/HUC2_01/NHDPLUS_H_0108_HU4_GDB_prepped_segmented.parquet
segments read-in


In [43]:
# # Cast objects to string type so they aren't dropped in groupby()
# segments['Bin'] = segments['Bin'].astype('|S')
# segments['GNIS_Name'] = segments['GNIS_Name'].astype(str).str.encode('utf-8', errors='replace').str.decode('utf-8')

In [44]:
# segments['GNIS_Name'] = segments['GNIS_Name'].astype('|S')

In [113]:
# Project CRS (currently to WGS 84 / UTM zone 18N)
segments = segments.to_crs(epsg='32618')

In [114]:
segments = segments.reset_index().rename(columns={'index': 'index_old'})

In [115]:
# Assign a unique counter within each index group
segments['counter'] = segments.groupby('NHDPlusID').cumcount()

In [116]:
# Keep only first ten segments (some reaches repeat)
segments = segments[segments['counter'] < 10]

# For HUC4_0109, 5000700035256 and 5000700072690

In [117]:
segments = segments.clip(pseudo_bounds)

In [118]:
# Keep only reaches that are fully contained in PIXC granule
segments = segments.groupby('NHDPlusID').filter(lambda x: len(x) == 10)

In [119]:
# Get number of reaches per bin
counts = pd.DataFrame(segments[binn].value_counts()).reset_index()

In [120]:
counts

Unnamed: 0,Bin_Min,count
0,"(0, 10]",15330
1,"(10, 20]",860
2,"(20, 30]",270
3,"(30, 40]",70


In [121]:
# Buffer segments
## PARALLELIZE
start = time.time()
segments['buffer'] = segments.parallel_apply(user_defined_function=specialBuffer,
                                                         args=(width,
                                                               'flat', True, False),
                                                         axis=1)
end = time.time()
print(end - start)
# segments['buffered'] = segments.buffer(distance=(segments.WidthM/2), cap_style='flat')

1.2981388568878174


In [122]:
segments = segments.set_geometry('buffer')

In [123]:
# Calculate segment area
segments['segment_area'] = segments.geometry.area

In [54]:
# segments.sort_values(['NHDPlusID', 'counter'])

In [55]:
# segments_buff = segments.geometry.buffer(distance=(segments.WidthM/2), cap_style='flat')

In [56]:
# segment_bounds = segments_buff.bounds

In [57]:
# ## Clip the pseudo pixels to the bounds of the reach
# pseudo_all = pseudo.union_all()

In [58]:
# fig, ax = plt.subplots(figsize=(8,8))
# pseudo.plot(ax=ax, column='klass')
# segments.plot(ax=ax, color='k', alpha=0.6)
# plt.xlim(824000, 826000)
# plt.ylim(4741000, 4743000)

In [59]:
# Merge the segments and pseudo-puxels by intersection
sj = gpd.sjoin(segments, gdf_PIXC_clip, how='left', predicate='intersects')

In [60]:
sj.columns

Index(['index_old', 'NHDPlusID', 'GNIS_Name', 'LengthKM', 'WidthM',
       'WidthM_Min', 'WidthM_Max', 'Bin', 'segments', 'Bin_Min', 'Bin_Max',
       'counter', 'buffer', 'segment_area', 'index_right', 'points',
       'azimuth_index', 'range_index', 'cross_track', 'pixel_area', 'height',
       'geoid', 'prior_water_prob', 'klass', 'latitude', 'longitude',
       'pixel_centroid', 'pseudo_geom'],
      dtype='object')

In [61]:
sj = sj.drop(columns=['index_right', 'points', 'azimuth_index',
                      'range_index',
                      'height', 'geoid',
                      'klass', 'latitude', 'longitude'])

In [62]:
sj = sj.set_geometry('pseudo_geom')

In [63]:
sj = sj.groupby('NHDPlusID', as_index=False).parallel_apply(user_defined_function=specialDissolve)

In [64]:
sj = sj.reset_index().drop(columns=['level_0', 'level_1'])

In [65]:
# def specialClip(df):
#     '''
#     XXX
#     '''
#     left = gpd.GeoSeries(df.pseudo_geom)
#     right = gpd.GeoSeries(df.buffer)
#     pseudo_geom_clip = left.clip(right)
#     return pseudo_geom_clip

In [66]:
sj['pseudo_geom_clip'] = sj.parallel_apply(user_defined_function=specialClip,
                                                         axis=1)

In [67]:
# sj[sj['NHDPlusID'] == 10000900090399].pseudo_geom.plot(cmap='hsv')

In [68]:
# Calculate the pseudo-pixel area within each node
sj['pseudo_area'] = sj.pseudo_geom_clip.area

In [69]:
sj['coverage'] = sj.pseudo_area/sj.segment_area

In [70]:
# sj_w_zero = sj.copy()

In [71]:
# sj_w_zero['coverage'] = sj_w_zero['coverage'].fillna(0)
sj['coverage'] = sj['coverage'].fillna(0)

In [72]:
# mask = sj.duplicated(subset=('NHDPlusID', 'counter'), keep=False)

In [73]:
# sj[mask][['NHDPlusID', 'counter', 'overlap_area']]

In [74]:
# dupe_mask = sj.duplicated(subset=['NHDPlusID', 'counter'], keep=False)

In [75]:
# sj[dupe_mask].segment_area

In [76]:
# len(sj[dupe_mask].index.unique())

In [77]:
# sj = sj.groupby(['NHDPlusID', 'counter'], as_index=False).sum('overlap_area')

In [78]:
# sj.sort_values(by=['NHDPlusID', 'counter'])

In [79]:
# sj['coverage'] = sj.overlap_area/sj.segment_area

In [80]:
# sj.sort_values(by=['NHDPlusID', 'counter'])

In [81]:
# sj[sj['overlap_area'] > 0].sort_values(['index_old', 'counter'])

In [82]:
# Merge back on Bin and GNIS_Name (dropped in groupby because they are objects)
# sj = pd.merge(left=sj, right=flowlines[['NHDPlusID', 'Bin', 'GNIS_Name']], on='NHDPlusID', how='left')

In [83]:
# sj = sj.rename(columns={'Bin_x': 'Bin', 'GNIS_Name_x': 'GNIS_Name'}).drop(columns=['Bin_y', 'GNIS_Name_y'])

### Do stats

In [84]:
bins = sj.Bin.unique()

#### Nodes

In [85]:
# nodes_mean = sj.groupby('Bin')['coverage'].mean().to_list()

In [86]:
# nodes_std = sj.groupby('Bin')['coverage'].std().to_list()

In [87]:
# nodes_count = sj.groupby('Bin')['coverage'].count().to_list()

In [88]:
# node_desc = sj.groupby('Bin')['coverage'].describe().reset_index()
# node_desc['with_zero'] = 0

In [89]:
# node_quant = pd.DataFrame(sj.groupby('Bin')['coverage'].quantile(q=[x / 100.0 for x in range(0,100,1)])).reset_index().rename(columns={'level_1': 'quantile'})
# node_quant['with_zero'] = 0

In [90]:
# plt.errorbar(x=node_desc.Bin, y=node_desc['mean'], yerr=node_desc['std'])

In [91]:
# d = {'mean': nodes_mean, 'std': nodes_std, 'count': nodes_count}

In [92]:
# nodes = pd.DataFrame(data=d).T

In [93]:
# nodes.columns = bins

#### Nodes with zeros

In [94]:
node_desc_w_zero = sj_w_zero.groupby(binn)['coverage'].describe().reset_index()
node_desc_w_zero['with_zero'] = 1
node_desc_w_zero

NameError: name 'sj_w_zero' is not defined

In [None]:
node_quant_w_zero = pd.DataFrame(sj_w_zero.groupby(binn)['coverage'].quantile(q=[x / 100.0 for x in range(0,100,1)])).reset_index().rename(columns={'level_1': 'quantile'})
node_quant_w_zero['with_zero'] = 1
node_quant_w_zero

#### Reaches

In [131]:
d = {}
# d_q = {}
for i in range(1, 10):
    threshold = i/10
    
    detected = sj.groupby([binn, 'NHDPlusID'])['coverage'].apply(lambda x: (x > threshold).sum()) / 10
    
    reach = detected.groupby(binn).quantile(q=[x / 100.0 for x in range(0,100,1)]).reset_index()
    
    d[threshold] = reach

In [132]:
# Add a column for each DataFrame indicating the key
# for threshold, data in d.items():
#     data['threshold'] = threshold
    
for threshold, data in d.items():
    data['threshold'] = threshold

In [133]:
# Concatenate all DataFrames into one
# reaches_desc = pd.concat(d.values())

reaches_cent = pd.concat(d.values()).rename(columns={'level_1': 'centile'})

In [134]:
reaches_cent = pd.merge(left=reaches_cent, right=counts, how='left', on=binn)

In [135]:
reaches_cent

Unnamed: 0,Bin_Min,centile,coverage,threshold,count
0,"(0, 10]",0.00,0.0,0.1,15330
1,"(0, 10]",0.01,0.0,0.1,15330
2,"(0, 10]",0.02,0.0,0.1,15330
3,"(0, 10]",0.03,0.0,0.1,15330
4,"(0, 10]",0.04,0.0,0.1,15330
...,...,...,...,...,...
3595,"(30, 40]",0.95,0.0,0.9,70
3596,"(30, 40]",0.96,0.0,0.9,70
3597,"(30, 40]",0.97,0.0,0.9,70
3598,"(30, 40]",0.98,0.0,0.9,70


In [143]:
reaches_min = pd.DataFrame(sj.groupby('NHDPlusID')['coverage'].min()).reset_index()

In [144]:
reaches_min = pd.merge(left=reaches_min, right=sj[['NHDPlusID', binn]], how='left', on='NHDPlusID')

In [145]:
min_cov

Unnamed: 0,NHDPlusID,coverage,Bin_Min
0,1.000090e+13,0.0,"(0, 10]"
1,1.000090e+13,0.0,"(0, 10]"
2,1.000090e+13,0.0,"(0, 10]"
3,1.000090e+13,0.0,"(0, 10]"
4,1.000090e+13,0.0,"(0, 10]"
...,...,...,...
1648,1.000090e+13,0.0,"(0, 10]"
1649,1.000090e+13,0.0,"(0, 10]"
1650,1.000090e+13,0.0,"(0, 10]"
1651,1.000090e+13,0.0,"(0, 10]"


In [None]:
# reaches = pd.DataFrame(data=d).T

In [None]:
# reaches.columns = bins

### Write out

In [None]:
save_path = os.path.join('/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/', 'PIXC_v2_0_HUC2_01')

In [None]:
# Combine node_desc
node_desc_both = pd.concat([node_desc, node_desc_w_zero], ignore_index=True)
node_desc_both

In [None]:
# Combine node_quant
node_quant_both = pd.concat([node_quant, node_quant_w_zero], ignore_index=True)
node_quant_both

In [None]:
# nodes_desc_both.to_csv(os.path.join(save_path, granule_name + '_nodes_describe.csv'))
# nodes_quant_both.to_csv(os.path.join(save_path, granule_name + '_nodes_quantile.csv'))

In [None]:
# reaches_desc.to_csv(os.path.join(save_path, granule_name + '_reaches_describe.csv'))
# reaches_quant.to_csv(os.path.join(save_path, granule_name + '_reaches_quantile.csv'))

In [None]:
test = pd.read_parquet('/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_output/PIXC_v2_0_HUC2_01_2025_02_04_min/SWOT_L2_HR_PIXC_001_270_077L_20230730T202544_20230730T202555_PGC0_01_reaches_cent.parquet')

In [None]:
test

### Conn tests

In [None]:
conn = sj[sj['NHDPlusID'] == 10000900090399]

In [None]:
conn = conn.drop(columns=['index_right', 'points', 'azimuth_index',
                      'range_index', 'cross_track', 'pixel_area',
                      'height', 'geoid', 'dlatitude_dphase',
                      'dlongitude_dphase', 'dheight_dphase',
                      'klass', 'latitude', 'longitude', ])

In [None]:
conn = conn.set_geometry('pseudo_geom')

In [None]:
conn = conn.dissolve(by='counter')

In [None]:
# conn

In [None]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv')
# conn.plot(ax=ax, color='k', alpha=0.6)

In [None]:
def specialClip(sj):
    left = gpd.GeoSeries(sj.pseudo_geom)
    right = gpd.GeoSeries(sj.buffered)
    pseudo_geom_clip = left.clip(right)
    return pseudo_geom_clip

In [None]:
conn['pseudo_geom_clip'] = conn.pseudo_geom.clip(conn.buffered)

In [None]:
conn['pseudo_geom_test'] = conn.parallel_apply(user_defined_function=specialClip,
                                               axis=1)

In [None]:
conn

In [None]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv', alpha=0.7)
# conn.pseudo_geom.plot(ax=ax, color='k')

# # Basemap
# ctx.add_basemap(ax, crs=conn.crs, source=ctx.providers.CartoDB.Positron)

# plt.axis('off')

In [None]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv', alpha=0.7)
# gpd.GeoSeries(conn.iloc[7].pseudo_geom).plot(ax=ax, color='k')

# # Basemap
# ctx.add_basemap(ax, crs=conn.crs, source=ctx.providers.CartoDB.Positron)

# plt.axis('off')

In [None]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv', alpha=0.7)
# gpd.GeoSeries(conn.iloc[7].pseudo_geom_clip).plot(ax=ax, color='k')

# # Basemap
# ctx.add_basemap(ax, crs=conn.crs, source=ctx.providers.CartoDB.Positron)

# plt.axis('off')

In [None]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv', alpha=0.7)
# gpd.GeoSeries(conn.iloc[7].pseudo_geom_test).plot(ax=ax, color='k')

# # Basemap
# ctx.add_basemap(ax, crs=conn.crs, source=ctx.providers.CartoDB.Positron)

# plt.axis('off')

In [None]:
# Calculate segment area
conn['overlap_total'] = conn.pseudo_geom.area

In [None]:
# Calculate segment area
conn['overlap_new'] = conn.pseudo_geom_clip.area

In [None]:
conn

In [None]:
conn.overlap_total / conn.segment_area

In [None]:
conn.overlap_new / conn.segment_area

In [None]:
# sj.columns

In [None]:
# sj.dtypes

In [None]:
# sj.index.unique()

In [None]:
# Find the overlap area
sj['overlap_area'] = sj.parallel_apply(lambda x: x['buffer'].intersection(x['pseudo_geom']).area if x['pseudo_geom'] is not None else 0, axis=1)

In [None]:
# sj.sort_values(by=['NHDPlusID', 'counter'])

In [None]:
conn['overlap_total'] = conn.groupby(['NHDPlusID', 'counter'])['overlap_area'].transform('sum')

In [None]:
conn