In [1]:
from argparse import ArgumentParser
import os
import sys
import time

# import contextily as ctx
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import shapely
import xarray as xr

# from matplotlib import colors
from pandarallel import pandarallel
from shapely.geometry import box

from reaches import readNHD
from reaches import findNadir
# from reaches import readSegments
from reaches import bitwiseMask
from reaches import makeGDF
from reaches import makePseudoPixels

from utils import specialBuffer
from utils import specialClip
from utils import specialDissolve

### Parse arguments

In [25]:
# parser = ArgumentParser(description='Please specify whether you would\
#                         like to use the min, mean, or max predicted\
#                         bankfull width for this analysis.')
# parser.add_argument('width_set', type=str, help='min, mean, or max')
# args=parser.parse_args()
# width_set = args.width_set

In [24]:
# FOR NOW, SET
width_set = 'mean'

# Control flow
if width_set == 'mean':
    width_col = 'WidthM'
elif width_set == 'min':
    width_col = 'WidthM_Min'
elif width_set == 'max':
    width_col = 'WidthM_Max'
else:
    print('Invalid width option specified, exiting.')
    # sys.exit()

### Pixel Cloud

In [2]:
# Get PIXC index metadata
mdata_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC/data/'
dtype_dic= {'cycle': str, 'pass': str, 'tile': str, 'version': str}

# Read in HUC lookup table
pixc_lookup = pd.read_csv(os.path.join(mdata_path,
                                  'PIXC_v2_0_HUC2_01_best_files_no_exits.csv'),
                     dtype=dtype_dic).drop(columns='index')

In [3]:
# Get job index
# slurm = int(os.environ['SLURM_ARRAY_TASK_ID'])
slurm = 3032

In [4]:
# Get filepath for this tile
file_name = pixc_lookup.loc[slurm, 'files']
granule_name = file_name[:-3]
tile_name = file_name[20:28]
pass_num = int(file_name[20:23])

In [5]:
pass_num

20

In [6]:
granule_name

'SWOT_L2_HR_PIXC_020_020_079L_20240821T082047_20240821T082058_PIC0_01'

#### Read in PIXC

In [7]:
# Set PIXC filepath
data_path = '/nas/cee-water/cjgleason/fiona/data/PIXC_v2_0_HUC2_01/'
pixc_path = os.path.join(data_path, file_name)

In [8]:
# Read in pixel group
ds_PIXC = xr.open_mfdataset(paths=pixc_path, group = 'pixel_cloud', engine='h5netcdf')

In [9]:
# Make mask
mask = bitwiseMask(ds_PIXC)

if mask.shape[0] == 0:
    print('This granule has no pixels after masking, exiting.')
    # sys.exit(1)    

(630986,)


In [10]:
# Set desired data vars
variables = ['azimuth_index', 'range_index', 'cross_track',
             'pixel_area', 'height', 'geoid',
             'dlatitude_dphase', 'dlongitude_dphase',
             'dheight_dphase', 'classification']

In [11]:
# Make PIXC into GeoDataFrame
gdf_PIXC = makeGDF(ds=ds_PIXC, mask=mask, data_vars=variables)

### Find correct HUC4s

In [12]:
# Read in tile and HUC4 intersection data
dtype_dic= {'tile': str, 'huc4': str, 'coverage': float}
tile_huc4 = pd.read_csv(os.path.join(mdata_path,
                                    'huc4_swot_science_tiles.csv'),
                        dtype=dtype_dic)

In [13]:
# Make list of HUC4s that intersect our tile
hucs = list(tile_huc4[tile_huc4['tile'] == tile_name]['huc4'])

In [14]:
hucs

['0202', '0430']

In [15]:
# Get NHD index metadata
# Define dtypes for lookup tables to preserve leading zeros
dtype_dic= {'HUC4': str, 'HUC2': str, 'toBasin': str, 'level': str}
# Read in HUC lookup table
lookup = pd.read_csv(os.path.join(mdata_path,
                                  'HUC4_lookup_no_great_lakes.csv'),
                     dtype=dtype_dic)

In [16]:
# Extract indices for read-in
indices = list(lookup[lookup['HUC4'].isin(hucs)]['slurm_index'])

### Read in HUC4 basins

In [40]:
# Create merged dataframe of all basins intersected
if len(indices) == 1:
    # Read prepped NHD
    basin, huc4_list, huc2_list = readNHD(index=indices[0])

else:
    # Initialize lists
    d = []
    huc4_list = []
    huc2_list = []
    
    # Loop through indices and store in lists
    for idx in indices:

        # Read prepped NHD
        basin, huc4, huc2 = readNHD(index=idx)

        # Append to lists
        d.append(basin)
        huc4_list.append(huc4)
        huc2_list.append(huc2)
        
    # Merge GeoDataFrames
    basin = pd.concat(d)

NHDPLUS_H_0202_HU4_GDB
read in
exploded
NHDPLUS_H_0430_HU4_GDB
read in
exploded


In [41]:
# Project CRS (currently to WGS 84 / UTM zone 18N)
basin = basin.to_crs(epsg=32618)

In [42]:
pandarallel.initialize()

INFO: Pandarallel will run on 24 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [21]:
# xxx HOW TO MAKE THIS FASTER?
# Buffer with an extra 50 m on each side to be safe
# This is beyond the max distance that the pixels
# could extend once converted to pseudo pixels
# start = time.time()
# basin['buffer'] = basin.buffer(distance=((basin.WidthM/2)+50), cap_style='flat')
# end = time.time()
# print(end - start)
# #  = 

70.56808090209961


In [43]:
start = time.time()
basin['buffer'] = basin.parallel_apply(user_defined_function=specialBuffer,
                                                         args=(width_col,
                                                               'flat',True),
                                                         axis=1)
end = time.time()
print(end - start)

16.742348432540894


In [44]:
# Set geometry to buffered reaches
basin = basin.set_geometry('buffer').set_crs(epsg=32618)

In [29]:
# basin.crs

In [45]:
# Clip masked pixels to buffered reaches
gdf_PIXC_clip = gpd.sjoin(gdf_PIXC, basin, how='inner', predicate='within')

In [46]:
if gdf_PIXC_clip.shape[0] == 0:
    print('This granule has no pixels that intersect reaches, exiting.')
    # sys.exit() 

In [47]:
# Drop unneeded cols
gdf_PIXC_clip = gdf_PIXC_clip.drop(columns=['index_right',
                                            'Bin', 'GNIS_Name',
                                            'LengthKM', 'NHDPlusID',
                                            'WidthM', 'geometry_right'])

In [63]:
# gdf_PIXC_clip

### Nadir track

In [49]:
# Get single pixel for selecting correct nadir segment
pixel_pt = gdf_PIXC_clip.iloc[0].geometry

In [50]:
# Find correct nadir segment and return its geometry
nadir_segment_ln = findNadir(pass_num=pass_num, pixel_pt=pixel_pt)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


### Make pseudo pixels

In [51]:
# Set along-track pixel resolution
azimuth_res = 21 # meters

In [52]:
# Make pseudo pixels
start = time.time()
gdf_PIXC_clip['pseudo_pixel'] = gdf_PIXC_clip.parallel_apply(user_defined_function=makePseudoPixels,
                                                         args=(nadir_segment_ln,
                                                               azimuth_res),
                                                         axis=1)
end = time.time()
print(end - start)

7.336681842803955


In [38]:
# fig, ax = plt.subplots(figsize=(8,8))
# gdf_PIXC_clip.iloc[5:10]['pseudo_pixel'].plot(ax=ax, alpha=0.5, color='y')
# gdf_PIXC_clip.iloc[5:10].plot(ax=ax, markersize=5, color='hotpink')

In [53]:
# xxxWHY NOT JUST KEEP THE SAME DATA FRAME AND DROP THE UNWANTED COLS?
# pseudo = gdf_PIXC_clip.drop(columns='geometry').set_geometry('pseudo_pixel').set_crs(crs=gdf_PIXC_clip.crs)
gdf_PIXC_clip = gdf_PIXC_clip.rename(columns={'geometry': 'pixel_centroid'}).set_geometry('pseudo_pixel')

In [59]:
# Get bounds of PIXC tile
pseudo_bounds = gdf_PIXC_clip.total_bounds
# pseudo_poly = box(pseudo_bounds[0], pseudo_bounds[1],
#                       pseudo_bounds[2], pseudo_bounds[3])
# gdf_pseudo_bounds = gpd.GeoDataFrame({'geometry': [pseudo_bounds]}, crs=pseudo.crs)

In [41]:
# save_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/PIXC_v2_0_pseudo_pixels_filtered/'

In [42]:
# gdf_PIXC.to_parquet(path=save_path + granule_name + '.parquet')

### Read in segments

In [98]:
# Create merged dataframe of all basins intersected
if len(indices) == 1:
    # Read prepped NHD
    segments, _, _ = readNHD(index=indices[0], segmented=True)
else:
    # Initialize lists
    d = []
    # Loop through indices and store in lists
    for idx in indices:
        # Read prepped NHD
        segments, huc4, _ = readNHD(index=idx, segmented=True)
        # Make column with HUC4 id
        segments['huc4_long'] = huc4
        segments['huc4'] = segments['huc4_long'].str[10:14]
        # Rename segments to geometry
        segments = segments.rename(columns={'segments': 'geometry'}).set_geometry('geometry')
        # Append to list
        d.append(segments)
        
    # Merge GeoDataFrames
    segments = pd.concat(d)

NHDPLUS_H_0202_HU4_GDB
read in
NHDPLUS_H_0430_HU4_GDB
read in


In [55]:
# # Cast objects to string type so they aren't dropped in groupby()
# segments['Bin'] = segments['Bin'].astype('|S')
# segments['GNIS_Name'] = segments['GNIS_Name'].astype(str).str.encode('utf-8', errors='replace').str.decode('utf-8')

In [56]:
# segments['GNIS_Name'] = segments['GNIS_Name'].astype('|S')

In [99]:
# Project CRS (currently to WGS 84 / UTM zone 18N)
segments = segments.to_crs(epsg='32618')

In [100]:
segments = segments.reset_index().rename(columns={'index': 'index_old'})

In [101]:
# Assign a unique counter within each index group
segments['counter'] = segments.groupby('NHDPlusID').cumcount()

In [102]:
# Keep only first ten segments (some reaches repeat)
segments = segments[segments['counter'] < 10]

# For HUC4_0109, 5000700035256 and 5000700072690

In [103]:
segments_clip = segments.clip(pseudo_bounds)

In [104]:
# Keep only reaches that are fully contained in PIXC granule
segments_clip = segments_clip.groupby('NHDPlusID').filter(lambda x: len(x) == 10)

In [105]:
# Buffer segments
## PARALLELIZE
start = time.time()
segments_clip['buffer'] = segments_clip.parallel_apply(user_defined_function=specialBuffer,
                                                         args=(width_col,
                                                               'flat', False),
                                                         axis=1)
end = time.time()
print(end - start)
# segments_clip['buffered'] = segments_clip.buffer(distance=(segments_clip.WidthM/2), cap_style='flat')

6.392529010772705


In [110]:
segments_clip = segments_clip.set_geometry('buffer')

In [111]:
# Calculate segment area
segments_clip['segment_area'] = segments_clip.geometry.area

In [67]:
# segments_clip.sort_values(['NHDPlusID', 'counter'])

In [68]:
# segments_buff = segments_clip.geometry.buffer(distance=(segments_clip.WidthM/2), cap_style='flat')

In [69]:
# segment_bounds = segments_buff.bounds

In [70]:
# ## Clip the pseudo pixels to the bounds of the reach
# pseudo_all = pseudo.union_all()

In [71]:
# fig, ax = plt.subplots(figsize=(8,8))
# pseudo.plot(ax=ax, column='klass')
# segments_clip.plot(ax=ax, color='k', alpha=0.6)
# plt.xlim(824000, 826000)
# plt.ylim(4741000, 4743000)

In [113]:
# Copy geometry column as sjoin will discard it
gdf_PIXC_clip['pseudo_geom'] = gdf_PIXC_clip.geometry

In [114]:
# Merge the segments and pseudo-puxels by intersection
sj = gpd.sjoin(segments_clip, gdf_PIXC_clip, how='left', predicate='intersects')

In [115]:
sj = sj.drop(columns=['index_right', 'points', 'azimuth_index',
                      'range_index', 'cross_track', 'pixel_area',
                      'height', 'geoid', 'dlatitude_dphase',
                      'dlongitude_dphase', 'dheight_dphase',
                      'klass', 'latitude', 'longitude', ])

In [116]:
sj = sj.set_geometry('pseudo_geom')

In [117]:
dissolved = sj.groupby('NHDPlusID', as_index=False).parallel_apply(user_defined_function=specialDissolve)

In [118]:
dissolved = dissolved.reset_index().drop(columns=['level_0', 'level_1'])

In [120]:
def specialClip(df):
    '''
    XXX
    '''
    left = gpd.GeoSeries(df.pseudo_geom)
    right = gpd.GeoSeries(df.buffer)
    pseudo_geom_clip = left.clip(right)
    return pseudo_geom_clip

In [121]:
dissolved['pseudo_geom_clip'] = dissolved.parallel_apply(user_defined_function=specialClip,
                                                         axis=1)

In [82]:
# dissolved[dissolved['NHDPlusID'] == 10000900090399].pseudo_geom.plot(cmap='hsv')

In [122]:
# Calculate the pseudo-pixel area within each node
dissolved['pseudo_area'] = dissolved.pseudo_geom_clip.area

In [123]:
dissolved['coverage'] = dissolved.pseudo_area/dissolved.segment_area

In [124]:
dissolved_w_zero = dissolved.copy()

In [125]:
dissolved_w_zero['coverage'] = dissolved_w_zero['coverage'].fillna(0)

In [None]:
# mask = sj.duplicated(subset=('NHDPlusID', 'counter'), keep=False)

In [None]:
# sj[mask][['NHDPlusID', 'counter', 'overlap_area']]

In [None]:
# dupe_mask = sj.duplicated(subset=['NHDPlusID', 'counter'], keep=False)

In [None]:
# sj[dupe_mask].segment_area

In [None]:
# len(sj[dupe_mask].index.unique())

In [None]:
# sj = sj.groupby(['NHDPlusID', 'counter'], as_index=False).sum('overlap_area')

In [None]:
# sj.sort_values(by=['NHDPlusID', 'counter'])

In [None]:
# sj['coverage'] = sj.overlap_area/sj.segment_area

In [None]:
# sj.sort_values(by=['NHDPlusID', 'counter'])

In [None]:
# sj[sj['overlap_area'] > 0].sort_values(['index_old', 'counter'])

In [95]:
# Merge back on Bin and GNIS_Name (dropped in groupby because they are objects)
# dissolved = pd.merge(left=dissolved, right=basin[['NHDPlusID', 'Bin', 'GNIS_Name']], on='NHDPlusID', how='left')

In [102]:
# dissolved = dissolved.rename(columns={'Bin_x': 'Bin', 'GNIS_Name_x': 'GNIS_Name'}).drop(columns=['Bin_y', 'GNIS_Name_y'])

### Do stats

In [126]:
bins = dissolved.Bin.unique()

#### Nodes

In [98]:
# nodes_mean = dissolved.groupby('Bin')['coverage'].mean().to_list()

In [99]:
# nodes_std = dissolved.groupby('Bin')['coverage'].std().to_list()

In [105]:
# nodes_count = dissolved.groupby('Bin')['coverage'].count().to_list()

In [127]:
node_desc = dissolved.groupby('Bin')['coverage'].describe().reset_index()
node_desc['with_zero'] = 0

In [128]:
node_quant = pd.DataFrame(dissolved.groupby('Bin')['coverage'].quantile(q=[x / 100.0 for x in range(0,100,1)])).reset_index().rename(columns={'level_1': 'quantile'})
node_quant['with_zero'] = 0

In [153]:
# plt.errorbar(x=node_desc.Bin, y=node_desc['mean'], yerr=node_desc['std'])

In [106]:
# d = {'mean': nodes_mean, 'std': nodes_std, 'count': nodes_count}

In [107]:
# nodes = pd.DataFrame(data=d).T

In [108]:
# nodes.columns = bins

#### Nodes with zeros

In [129]:
node_desc_w_zero = dissolved_w_zero.groupby('Bin')['coverage'].describe().reset_index()
node_desc_w_zero['with_zero'] = 1
node_desc_w_zero

Unnamed: 0,Bin,count,mean,std,min,25%,50%,75%,max,with_zero
0,"(0, 10]",53440.0,0.040235,0.166289,0.0,0.0,0.0,0.0,1.0,1
1,"(10, 20]",5590.0,0.059441,0.179918,0.0,0.0,0.0,0.0,1.0,1
2,"(100, 150]",3430.0,0.289019,0.352858,0.0,0.0,0.035988,0.615996,0.985134,1
3,"(150, 200]",530.0,0.288955,0.277612,0.0,0.026549,0.209815,0.489319,0.903172,1
4,"(20, 30]",1880.0,0.118453,0.269037,0.0,0.0,0.0,0.015941,1.0,1
5,"(30, 40]",700.0,0.046481,0.163406,0.0,0.0,0.0,0.0,0.975981,1
6,"(40, 50]",320.0,0.111567,0.199191,0.0,0.0,0.0,0.148886,0.923407,1
7,"(50, 60]",120.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
8,"(60, 70]",710.0,0.083975,0.208769,0.0,0.0,0.0,0.0,0.966461,1
9,"(70, 80]",750.0,0.053269,0.207748,0.0,0.0,0.0,0.0,0.96642,1


In [215]:
node_quant_w_zero = pd.DataFrame(dissolved_w_zero.groupby('Bin')['coverage'].quantile(q=[x / 100.0 for x in range(0,100,1)])).reset_index().rename(columns={'level_1': 'quantile'})
node_quant_w_zero['with_zero'] = 1
node_quant_w_zero

Unnamed: 0,Bin,quantile,coverage,with_zeros
0,"(0, 10]",0.00,0.000000,1
1,"(0, 10]",0.01,0.000000,1
2,"(0, 10]",0.02,0.000000,1
3,"(0, 10]",0.03,0.000000,1
4,"(0, 10]",0.04,0.000000,1
...,...,...,...,...
895,"(60, 70]",0.95,0.989875,1
896,"(60, 70]",0.96,0.992942,1
897,"(60, 70]",0.97,0.997144,1
898,"(60, 70]",0.98,0.999938,1


#### Reaches

In [216]:
d = {}
d_q = {}
for i in range(1, 10):
    threshold = i/10
    
    detected = dissolved.groupby(['Bin', 'NHDPlusID'])['coverage'].apply(lambda x: (x > threshold).sum()) / 10

    reach = detected.groupby('Bin').describe()#.to_list()
    reach_q = detected.groupby('Bin').quantile(q=[x / 100.0 for x in range(0,100,1)]).reset_index()
    
    d[threshold] = reach
    d_q[threshold] = reach_q

In [217]:
# Add a column for each DataFrame indicating the key
for threshold, data in d.items():
    data['threshold'] = threshold
    
for threshold, data in d_q.items():
    data['threshold'] = threshold

In [218]:
# Concatenate all DataFrames into one
reaches_desc = pd.concat(d.values())

reaches_quant = pd.concat(d_q.values()).rename(columns={'level_1': 'quantile'})

In [219]:
reaches_quant

Unnamed: 0,Bin,quantile,coverage,threshold
0,"(0, 10]",0.00,0.000,0.1
1,"(0, 10]",0.01,0.000,0.1
2,"(0, 10]",0.02,0.000,0.1
3,"(0, 10]",0.03,0.000,0.1
4,"(0, 10]",0.04,0.000,0.1
...,...,...,...,...
895,"(60, 70]",0.95,0.800,0.9
896,"(60, 70]",0.96,0.912,0.9
897,"(60, 70]",0.97,1.000,0.9
898,"(60, 70]",0.98,1.000,0.9


In [175]:
# reaches = pd.DataFrame(data=d).T

In [184]:
# reaches.columns = bins

### Write out

In [None]:
save_path = os.path.join('/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/', 'PIXC_v2_0_HUC2_01')

In [221]:
# Combine node_desc
node_desc_both = pd.concat([node_desc, node_desc_w_zero], ignore_index=True)
node_desc_both

Unnamed: 0,Bin,count,mean,std,min,25%,50%,75%,max,with_zeros
0,"(0, 10]",8316.0,0.525414,0.378327,1.724403e-07,0.160617,0.464608,0.999745,1.0,0
1,"(10, 20]",1720.0,0.525567,0.354701,4.884105e-07,0.167442,0.530201,0.879635,1.0,0
2,"(150, 200]",770.0,0.664324,0.254113,0.0007181689,0.506618,0.734146,0.877187,0.992291,0
3,"(20, 30]",938.0,0.639324,0.310956,8.823649e-05,0.382331,0.718682,0.917814,1.0,0
4,"(200, 500]",15.0,0.615063,0.231852,0.001785946,0.473321,0.675602,0.787632,0.878283,0
5,"(30, 40]",620.0,0.478617,0.270736,4.08688e-06,0.258722,0.505946,0.682432,1.0,0
6,"(40, 50]",244.0,0.447759,0.28622,0.0001965358,0.172138,0.465598,0.708916,0.94031,0
7,"(50, 60]",497.0,0.705233,0.256108,0.002652534,0.590697,0.768183,0.904054,1.0,0
8,"(60, 70]",417.0,0.653624,0.322674,0.0006189416,0.36234,0.791855,0.929824,1.0,0
9,"(0, 10]",64630.0,0.067606,0.222184,0.0,0.0,0.0,0.0,1.0,1


In [224]:
# Combine node_quant
node_quant_both = pd.concat([node_quant, node_quant_w_zero], ignore_index=True)
node_quant_both

Unnamed: 0,Bin,quantile,coverage,with_zeros
0,"(0, 10]",0.00,1.724403e-07,0
1,"(0, 10]",0.01,6.799768e-04,0
2,"(0, 10]",0.02,2.507470e-03,0
3,"(0, 10]",0.03,5.758259e-03,0
4,"(0, 10]",0.04,9.941229e-03,0
...,...,...,...,...
1795,"(60, 70]",0.95,9.898752e-01,1
1796,"(60, 70]",0.96,9.929422e-01,1
1797,"(60, 70]",0.97,9.971435e-01,1
1798,"(60, 70]",0.98,9.999383e-01,1


In [220]:
# nodes_desc_both.to_csv(os.path.join(save_path, granule_name + '_nodes_describe.csv'))
# nodes_quant_both.to_csv(os.path.join(save_path, granule_name + '_nodes_quantile.csv'))

In [None]:
# reaches_desc.to_csv(os.path.join(save_path, granule_name + '_reaches_describe.csv'))
# reaches_quant.to_csv(os.path.join(save_path, granule_name + '_reaches_quantile.csv'))

### Conn tests

In [220]:
conn = sj[sj['NHDPlusID'] == 10000900090399]

In [221]:
conn = conn.drop(columns=['index_right', 'points', 'azimuth_index',
                      'range_index', 'cross_track', 'pixel_area',
                      'height', 'geoid', 'dlatitude_dphase',
                      'dlongitude_dphase', 'dheight_dphase',
                      'klass', 'latitude', 'longitude', ])

In [222]:
conn = conn.set_geometry('pseudo_geom')

In [223]:
conn = conn.dissolve(by='counter')

In [151]:
# conn

In [262]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv')
# conn.plot(ax=ax, color='k', alpha=0.6)

In [230]:
def specialClip(sj):
    left = gpd.GeoSeries(sj.pseudo_geom)
    right = gpd.GeoSeries(sj.buffered)
    pseudo_geom_clip = left.clip(right)
    return pseudo_geom_clip

In [231]:
conn['pseudo_geom_clip'] = conn.pseudo_geom.clip(conn.buffered)

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:32618
Right CRS: None

  return geopandas.clip(self, mask=mask, keep_geom_type=keep_geom_type, sort=sort)


In [232]:
conn['pseudo_geom_test'] = conn.parallel_apply(user_defined_function=specialClip,
                                               axis=1)

In [233]:
conn

Unnamed: 0_level_0,pseudo_geom,index_old,Bin,GNIS_Name,LengthKM,NHDPlusID,WidthM,segments,buffered,segment_area,pseudo_geom_clip,pseudo_geom_test
counter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,"MULTIPOLYGON (((695435.301 4690008.574, 695438...",49682,"(150, 200]",Connecticut River,3.323,10000900000000.0,191.820877,"LINESTRING (695478.047 4690091.994, 695486.9 4...","POLYGON ((695531.445 4690172.29, 695532.454 46...",63995.053794,"MULTIPOLYGON (((695438.262 4690009.382, 695438...","MULTIPOLYGON (((695438.262 4690009.382, 695438..."
1,"MULTIPOLYGON (((695825.742 4689867.135, 695825...",49682,"(150, 200]",Connecticut River,3.323,10000900000000.0,191.820877,"LINESTRING (695766.443 4689927.077, 695772.404...","POLYGON ((695811.349 4690012.076, 695811.84 46...",64015.389023,"MULTIPOLYGON (((695825.744 4689867.128, 695800...","MULTIPOLYGON (((695825.744 4689867.128, 695800..."
2,"MULTIPOLYGON (((696028.687 4689674.71, 696022....",49682,"(150, 200]",Connecticut River,3.323,10000900000000.0,191.820877,"LINESTRING (696066.573 4689782.304, 696069.393...","POLYGON ((696104.403 4689870.482, 696113.4 468...",63795.413096,"MULTIPOLYGON (((696026.276 4689696.857, 696026...","MULTIPOLYGON (((696051.738 4689703.808, 696051..."
3,"MULTIPOLYGON (((696371.735 4689633.256, 696366...",49682,"(150, 200]",Connecticut River,3.323,10000900000000.0,191.820877,"LINESTRING (696381.304 4689769.626, 696383.144...","POLYGON ((696423.97 4689855.547, 696425.76 468...",63981.497856,"MULTIPOLYGON (((696328.88 4689689.148, 696327....","MULTIPOLYGON (((696373.119 4689724.041, 696377..."
4,"POLYGON ((696697.854 4689452.232, 696697.932 4...",49682,"(150, 200]",Connecticut River,3.323,10000900000000.0,191.820877,"LINESTRING (696652.497 4689576.263, 696659.552...","POLYGON ((696714.855 4689649.608, 696722.113 4...",63992.327552,"MULTIPOLYGON (((696697.932 4689452.253, 696692...","MULTIPOLYGON (((696697.932 4689452.253, 696692..."
5,"MULTIPOLYGON (((696936.504 4689269.287, 696936...",49682,"(150, 200]",Connecticut River,3.323,10000900000000.0,191.820877,"LINESTRING (696931.327 4689393.349, 696935.67 ...","POLYGON ((696954.964 4689275.586, 696949.513 4...",64014.006224,"MULTIPOLYGON (((696871.63 4689319.441, 696883....","MULTIPOLYGON (((696943.324 4689384.099, 696944..."
6,"POLYGON ((697279.939 4689160.329, 697277.768 4...",49682,"(150, 200]",Connecticut River,3.323,10000900000000.0,191.820877,"LINESTRING (697230.791 4689247.944, 697232.266...","POLYGON ((697267.873 4689336.407, 697276.597 4...",64045.190958,"MULTIPOLYGON (((697277.768 4689159.737, 697272...","MULTIPOLYGON (((697277.768 4689159.737, 697272..."
7,"POLYGON ((697544.209 4689029.726, 697520.377 4...",49682,"(150, 200]",Connecticut River,3.323,10000900000000.0,191.820877,"LINESTRING (697547.788 4689143.494, 697555.383...","POLYGON ((697582.167 4689233.371, 697591.48 46...",64059.656482,"POLYGON ((697507.869 4689065.145, 697513.094 4...","POLYGON ((697584.064 4689221.196, 697585.043 4..."
8,"POLYGON ((697905.339 4688993.211, 697899.615 4...",49682,"(150, 200]",Connecticut River,3.323,10000900000000.0,191.820877,"LINESTRING (697873.818 4689074.515, 697877.864...","POLYGON ((697887.754 4689169.495, 697888.369 4...",64068.472595,"POLYGON ((697899.615 4689014.179, 697895.787 4...","POLYGON ((697899.615 4689014.179, 697895.787 4..."
9,"MULTIPOLYGON (((698262.416 4688955.843, 698256...",49682,"(150, 200]",Connecticut River,3.323,10000900000000.0,191.820877,"LINESTRING (698205.422 4689041.412, 698205.605...","POLYGON ((698210.029 4689137.212, 698219.747 4...",64068.952184,"MULTIPOLYGON (((698256.76 4688976.564, 698242....","MULTIPOLYGON (((698256.76 4688976.564, 698242...."


In [261]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv', alpha=0.7)
# conn.pseudo_geom.plot(ax=ax, color='k')

# # Basemap
# ctx.add_basemap(ax, crs=conn.crs, source=ctx.providers.CartoDB.Positron)

# plt.axis('off')

In [259]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv', alpha=0.7)
# gpd.GeoSeries(conn.iloc[7].pseudo_geom).plot(ax=ax, color='k')

# # Basemap
# ctx.add_basemap(ax, crs=conn.crs, source=ctx.providers.CartoDB.Positron)

# plt.axis('off')

In [260]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv', alpha=0.7)
# gpd.GeoSeries(conn.iloc[7].pseudo_geom_clip).plot(ax=ax, color='k')

# # Basemap
# ctx.add_basemap(ax, crs=conn.crs, source=ctx.providers.CartoDB.Positron)

# plt.axis('off')

In [258]:
# fig, ax = plt.subplots(figsize=(15,15))
# conn.buffered.plot(ax=ax, cmap='hsv', alpha=0.7)
# gpd.GeoSeries(conn.iloc[7].pseudo_geom_test).plot(ax=ax, color='k')

# # Basemap
# ctx.add_basemap(ax, crs=conn.crs, source=ctx.providers.CartoDB.Positron)

# plt.axis('off')

In [208]:
# Calculate segment area
conn['overlap_total'] = conn.pseudo_geom.area

In [209]:
# Calculate segment area
conn['overlap_new'] = conn.pseudo_geom_clip.area

In [212]:
conn

Unnamed: 0_level_0,pseudo_geom,index_old,Bin,GNIS_Name,LengthKM,NHDPlusID,WidthM,segments,buffered,segment_area,pseudo_geom_clip,pseudo_geom_test,overlap_total,overlap_new
counter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,"MULTIPOLYGON (((696235.571 4691354.078, 696229...",64496,"(150, 200]",Connecticut River,1.663,10000900000000.0,191.781368,"LINESTRING (696420.217 4691423.489, 696408.212...","POLYGON ((696398.233 4691329.375, 696397.183 4...",32077.594224,"MULTIPOLYGON (((696229.846 4691375.053, 696250...","MULTIPOLYGON (((696256.307 4691427.076, 696254...",29941.427264,22394.328463
1,"MULTIPOLYGON (((696052.865 4691371.743, 696047...",64496,"(150, 200]",Connecticut River,1.663,10000900000000.0,191.781368,"LINESTRING (696253.668 4691435.249, 696248.762...","POLYGON ((696253.29 4691339.233, 696250.384 46...",32075.535908,"MULTIPOLYGON (((696047.139 4691392.718, 696071...","MULTIPOLYGON (((696077.439 4691378.468, 696087...",28153.602711,23042.776807
2,"MULTIPOLYGON (((695882.903 4691370.088, 695877...",64496,"(150, 200]",Connecticut River,1.663,10000900000000.0,191.781368,"LINESTRING (696088.226 4691456.278, 696080.746...","POLYGON ((696057.599 4691365.138, 696049.65 46...",32075.246963,"MULTIPOLYGON (((695884.398 4691393.301, 695878...","MULTIPOLYGON (((695920.563 4691448.255, 695926...",31402.399143,26467.001127
3,"MULTIPOLYGON (((695737.343 4691420.439, 695724...",64496,"(150, 200]",Connecticut River,1.663,10000900000000.0,191.781368,"LINESTRING (695922.877 4691479.671, 695922.4 4...","POLYGON ((695903.493 4691385.759, 695902.24 46...",32074.382248,"MULTIPOLYGON (((695727.948 4691417.875, 695724...","MULTIPOLYGON (((695766.1 4691496.171, 695766.0...",30619.545584,27462.803469
4,"MULTIPOLYGON (((695566.999 4691441.757, 695592...",64496,"(150, 200]",Connecticut River,1.663,10000900000000.0,191.781368,"LINESTRING (695758.657 4691510.698, 695755.384...","POLYGON ((695743.382 4691415.987, 695733.839 4...",32076.502887,"MULTIPOLYGON (((695582.729 4691446.051, 695581...","MULTIPOLYGON (((695592.08 4691448.603, 695596....",31323.096196,26996.463965
5,"MULTIPOLYGON (((695464.092 4691480.957, 695464...",64496,"(150, 200]",Connecticut River,1.663,10000900000000.0,191.781368,"LINESTRING (695592.468 4691529.541, 695586.398...","POLYGON ((695575.311 4691435.002, 695565.411 4...",32100.005933,"MULTIPOLYGON (((695464.034 4691480.941, 695465...","MULTIPOLYGON (((695464.034 4691480.941, 695465...",33353.98978,28230.845393
6,"POLYGON ((695308.777 4691506.379, 695303.124 4...",64496,"(150, 200]",Connecticut River,1.663,10000900000000.0,191.781368,"LINESTRING (695434.535 4691580.212, 695425.91 ...","POLYGON ((695304.423 4691512.111, 695304.901 4...",32051.301502,"MULTIPOLYGON (((695306.462 4691514.862, 695306...","POLYGON ((695306.787 4691513.673, 695306.927 4...",34613.088602,29162.100931
7,"POLYGON ((695118.697 4691476.952, 695113.041 4...",64496,"(150, 200]",Connecticut River,1.663,10000900000000.0,191.781368,"LINESTRING (695272.163 4691599.574, 695270.823...","POLYGON ((695307.341 4691510.365, 695298.637 4...",32068.440603,"MULTIPOLYGON (((695113.041 4691497.674, 695096...","MULTIPOLYGON (((695110.339 4691609.885, 695104...",31332.750204,28890.502008
8,"POLYGON ((694957.338 4691455.388, 694963.941 4...",64496,"(150, 200]",Connecticut River,1.663,10000900000000.0,191.781368,"LINESTRING (695113.298 4691550.204, 695109.041...","POLYGON ((695120.388 4691454.495, 695111.652 4...",32073.487171,"MULTIPOLYGON (((694963.941 4691457.19, 694958....","POLYGON ((694920.509 4691603.063, 694932.315 4...",35230.243973,30673.515486
9,"POLYGON ((694802.28 4691367.926, 694796.623 46...",64496,"(150, 200]",Connecticut River,1.663,10000900000000.0,191.781368,"LINESTRING (694950.601 4691512.59, 694943.713 ...","POLYGON ((694793.84 4691539.614, 694766.186 46...",32132.553777,"MULTIPOLYGON (((694800.798 4691373.354, 694798...","POLYGON ((694800.798 4691373.354, 694798.242 4...",31761.453577,27426.347291


In [210]:
conn.overlap_total / conn.segment_area

counter
0    0.933406
1    0.877728
2    0.979023
3    0.954642
4    0.976512
5    1.039065
6    1.079928
7    0.977059
8    1.098423
9    0.988451
dtype: float64

In [211]:
conn.overlap_new / conn.segment_area

counter
0    0.698130
1    0.718391
2    0.825153
3    0.856222
4    0.841627
5    0.879465
6    0.909857
7    0.900901
8    0.956351
9    0.853538
dtype: float64

In [None]:
# sj.columns

In [None]:
# sj.dtypes

In [None]:
# sj.index.unique()

In [80]:
# Find the overlap area
sj['overlap_area'] = sj.parallel_apply(lambda x: x['buffer'].intersection(x['pseudo_geom']).area if x['pseudo_geom'] is not None else 0, axis=1)

In [83]:
# sj.sort_values(by=['NHDPlusID', 'counter'])

In [102]:
conn['overlap_total'] = conn.groupby(['NHDPlusID', 'counter'])['overlap_area'].transform('sum')

In [103]:
conn

Unnamed: 0,index_old,Bin,GNIS_Name,LengthKM,NHDPlusID,WidthM,segments,counter,buffer,segment_area,pseudo_geom,overlap_area,overlap_total
644963,64496,"(150, 200]",Connecticut River,1.663,1.000090e+13,191.781368,"LINESTRING (695922.877 4691479.671, 695922.4 4...",3,"POLYGON ((695903.493 4691385.759, 695902.24 46...",32074.382248,"POLYGON ((695842.109 4691404.281, 695847.902 4...",311.008664,36960.232366
644963,64496,"(150, 200]",Connecticut River,1.663,1.000090e+13,191.781368,"LINESTRING (695922.877 4691479.671, 695922.4 4...",3,"POLYGON ((695903.493 4691385.759, 695902.24 46...",32074.382248,"POLYGON ((695877.11 4691391.311, 695882.903 46...",165.198850,36960.232366
644963,64496,"(150, 200]",Connecticut River,1.663,1.000090e+13,191.781368,"LINESTRING (695922.877 4691479.671, 695922.4 4...",3,"POLYGON ((695903.493 4691385.759, 695902.24 46...",32074.382248,"POLYGON ((695892.827 4691395.609, 695898.62 46...",124.243686,36960.232366
644963,64496,"(150, 200]",Connecticut River,1.663,1.000090e+13,191.781368,"LINESTRING (695922.877 4691479.671, 695922.4 4...",3,"POLYGON ((695903.493 4691385.759, 695902.24 46...",32074.382248,"POLYGON ((695899.508 4691397.434, 695905.301 4...",54.124712,36960.232366
644963,64496,"(150, 200]",Connecticut River,1.663,1.000090e+13,191.781368,"LINESTRING (695922.877 4691479.671, 695922.4 4...",3,"POLYGON ((695903.493 4691385.759, 695902.24 46...",32074.382248,"POLYGON ((695759.886 4691404.342, 695765.679 4...",0.827977,36960.232366
...,...,...,...,...,...,...,...,...,...,...,...,...,...
644969,64496,"(150, 200]",Connecticut River,1.663,1.000090e+13,191.781368,"LINESTRING (694950.601 4691512.59, 694943.713 ...",9,"POLYGON ((694793.84 4691539.614, 694766.186 46...",32132.553777,"POLYGON ((694809.465 4691482.573, 694815.258 4...",591.197327,35192.051275
644969,64496,"(150, 200]",Connecticut River,1.663,1.000090e+13,191.781368,"LINESTRING (694950.601 4691512.59, 694943.713 ...",9,"POLYGON ((694793.84 4691539.614, 694766.186 46...",32132.553777,"POLYGON ((694809.465 4691482.573, 694815.258 4...",591.197327,35192.051275
644969,64496,"(150, 200]",Connecticut River,1.663,1.000090e+13,191.781368,"LINESTRING (694950.601 4691512.59, 694943.713 ...",9,"POLYGON ((694793.84 4691539.614, 694766.186 46...",32132.553777,"POLYGON ((694899.461 4691484.638, 694905.254 4...",589.694275,35192.051275
644969,64496,"(150, 200]",Connecticut River,1.663,1.000090e+13,191.781368,"LINESTRING (694950.601 4691512.59, 694943.713 ...",9,"POLYGON ((694793.84 4691539.614, 694766.186 46...",32132.553777,"POLYGON ((694907.967 4691486.964, 694913.76 46...",588.973633,35192.051275
