In [93]:
import os
import sys
import time

# import contextily as ctx
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import shapely
import xarray as xr

# from matplotlib import colors
from pandarallel import pandarallel
from shapely.affinity import translate
from shapely.geometry import box, LineString, Point, Polygon
from shapely.ops import substring

from reaches import readNHD
from reaches import readSegments
from reaches import bitwiseMask
from reaches import makeGDF
from reaches import makePseudoPixels

### Pixel Cloud

In [2]:
data_path = '/nas/cee-water/cjgleason/fiona/data/PIXC_v2_0_HUC2_01/'

# index = 4 # HUC4 0108, Connecticut

pixc_path = os.path.join(data_path, 'SWOT_L2_HR_PIXC_005_035_230R_20231013T222132_20231013T222143_PGC0_01.nc')

tile_name = pixc_path[-71:-3]

#### Check if tile intersects NHD

In [3]:
# Read in xarray, global
ds_GLOB = xr.open_mfdataset(paths=pixc_path, engine='h5netcdf')

In [4]:
# Get bounding coordinates for SWOT tile
west_lon = ds_GLOB.geospatial_lon_min
south_lat = ds_GLOB.geospatial_lat_min
east_lon = ds_GLOB.geospatial_lon_max
north_lat = ds_GLOB.geospatial_lat_max

In [5]:
# Clip width polygons to current points
bbox = box(west_lon, south_lat, east_lon, north_lat)
bbox = gpd.GeoDataFrame({'geometry': [bbox]}, crs="EPSG:4326")
bbox = bbox.to_crs(epsg='32618')

In [6]:
# Read in HUC4 boundaries (no Great Lakes)
data_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC/data/all_wbd_no_great_lakes.parquet'
wbd = gpd.read_parquet(path=data_path)

In [7]:
# Project CRS
wbd = wbd.to_crs(epsg=32618)

In [8]:
test = gpd.sjoin(wbd, bbox, how='inner', predicate='intersects')

In [9]:
if test.shape[0] == 0:
    print('This granule does not intersect the NHD, exiting.')
    # sys.exit()    

#### Proceed

In [10]:
pass_num = ds_GLOB.pass_number
pass_num

np.int16(35)

In [11]:
swath_side = ds_GLOB.swath_side
swath_side

'R'

In [12]:
# Read in xarray, pixel group
ds_PIXC = xr.open_mfdataset(paths=pixc_path, group = 'pixel_cloud', engine='h5netcdf')

In [13]:
# Make mask
mask = bitwiseMask(ds_PIXC)

if mask.shape[0] == 0:
    print('This granule has no pixels after masking, exiting.')
    # sys.exit()    

(2996874,)


In [14]:
variables = ['azimuth_index', 'range_index', 'cross_track',
             'pixel_area', 'height', 'geoid',
             'dlatitude_dphase', 'dlongitude_dphase',
             'dheight_dphase', 'classification']

In [15]:
# Make PIXC into GeoDataFrame
gdf_PIXC = makeGDF(ds=ds_PIXC, mask=mask, data_vars=variables)

### Find correct HUC4

In [16]:
# Get index metadata
mdata_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC/data/'
# Define dtypes for lookup tables to preserve leading zeros
dtype_dic= {'HUC4': str, 'HUC2': str, 'toBasin': str, 'level': str}
# Read in HUC lookup table
lookup = pd.read_csv(os.path.join(mdata_path,
                                  'HUC4_lookup_no_great_lakes.csv'),
                     dtype=dtype_dic)

In [17]:
# Merge on index metadata
wbd = pd.merge(left=wbd, right=lookup, how='inner', left_on='huc4',
          right_on='HUC4').drop(columns=['HUC4', 'HUC2', 'toBasin',
                                         'level'])

In [18]:
# Get bounds of PIXC tile
bounds_PIXC = gdf_PIXC.union_all().convex_hull
gdf_bounds = gpd.GeoDataFrame({'geometry': [bounds_PIXC]}, crs=wbd.crs)

In [19]:
# Get slurm indices (from mdata) of basins that intersect PIXC tile
indices = gpd.sjoin(wbd, gdf_bounds, how='inner', predicate='intersects')['slurm_index'].to_list()

### Read in HUC4 basins

In [20]:
# Create merged dataframe of all basins intersected
if len(indices) == 1:
    # Read prepped NHD
    basin, huc4_list, huc2_list = readNHD(index=idx)

else:
    # Initialize lists
    d = []
    huc4_list = []
    huc2_list = []
    
    # Loop through indices and store in lists
    for idx in indices:

        # Read prepped NHD
        basin, huc4, huc2 = readNHD(index=idx)

        # Append to lists
        d.append(basin)
        huc4_list.append(huc4)
        huc2_list.append(huc2)
        
    # Merge GeoDataFrames
    basin = pd.concat(d)

NHDPLUS_H_0107_HU4_GDB
read in
exploded
NHDPLUS_H_0106_HU4_GDB
read in
exploded
NHDPLUS_H_0109_HU4_GDB
read in
exploded


In [21]:
# Project crs
basin = basin.to_crs(epsg=32618)

In [22]:
# Buffer with an extra 40 m on each side to be safe
basin['buffer'] = basin.buffer(distance=((basin.WidthM/2)+50), cap_style='flat')

In [23]:
# Set geometry to buffered reaches
basin = basin.set_geometry('buffer')

In [24]:
# Get only pixels within buffered reaches
gdf_PIXC_clip = gpd.sjoin(gdf_PIXC, basin, how='inner', predicate='within')

In [25]:
# Drop unneeded cols
gdf_PIXC_clip = gdf_PIXC_clip.drop(columns=['index_right',
                                            'Bin', 'GNIS_Name',
                                            'LengthKM', 'NHDPlusID',
                                            'WidthM', 'geometry_right'])

### Nadir track

In [26]:
# Get single pixel for selecting correct nadir segment
pixel = gdf_PIXC_clip.sample(n=1, random_state=0)

In [27]:
pixel

Unnamed: 0,points,azimuth_index,range_index,cross_track,pixel_area,height,geoid,dlatitude_dphase,dlongitude_dphase,dheight_dphase,klass,latitude,longitude,geometry
634135,3514024,1213.0,3027.0,50916.714844,253.294296,-28.749895,-27.780602,-0.000268,0.001409,7.700175,3.0,42.738824,-70.818031,POINT (842329.879 4740298.797)


In [28]:
# Get geometry of single pixel
pixel_pt = pixel.geometry[pixel.index[0]]

In [29]:
# Read in nadir (science orbit)
nadir = gpd.read_file('/nas/cee-water/cjgleason/data/SWOT/swath/swot_science_hr_Aug2021-v05_shapefile_nadir/swot_science_hr_2.0s_4.0s_Aug2021-v5_nadir.shp')

In [30]:
# Convert CRS to WGS 84 / UTM zone 18N
nadir = nadir.to_crs(epsg=32618)

In [31]:
# Find candidate nadir segments
candidates = nadir[nadir['ID_PASS'] == pass_num]

In [32]:
# Find distance from each candidate to single pixel
candidates['dist'] = candidates.loc[:,'geometry'].distance(pixel_pt)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [33]:
# Get nadir segment closest to single pixel
nadir_segment = candidates[candidates.dist == candidates.dist.min()]

In [34]:
# Get nadir segment geoemtry
nadir_segment_ln = nadir_segment.geometry[nadir_segment.index[0]]

### Make pseudo pixels

In [35]:
# Set along-track pixel resolution
azimuth_res = 22 # meters

In [36]:
pandarallel.initialize()

INFO: Pandarallel will run on 24 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [37]:
# Make pseudo pixels
start = time.time()
gdf_PIXC_clip['pseudo_pixel'] = gdf_PIXC_clip.parallel_apply(user_defined_function=makePseudoPixels,
                                                         args=(nadir_segment_ln,
                                                               azimuth_res),
                                                         axis=1)
end = time.time()
print(end - start)

6.569878339767456


In [51]:
pseudo = gdf_PIXC_clip.drop(columns='geometry').set_geometry('pseudo_pixel').set_crs(crs=gdf_PIXC_clip.crs)

In [100]:
# Get bounds of PIXC tile
# pseudo_bounds = pseudo.union_all().convex_hull
pseudo_bounds = pseudo.total_bounds
pseudo_poly = box(pseudo_bounds[0], pseudo_bounds[1],
                      pseudo_bounds[2], pseudo_bounds[3])
# gdf_pseudo_bounds = gpd.GeoDataFrame({'geometry': [pseudo_bounds]}, crs=pseudo.crs)

In [38]:
# save_path = '/nas/cee-water/cjgleason/fiona/narrow_rivers_PIXC_data/PIXC_v2_0_pseudo_pixels_filtered/'

In [39]:
# gdf_PIXC.to_parquet(path=save_path + tile_name + '.parquet')

### Read in segments

In [40]:
# Create merged dataframe of all basins intersected
if len(indices) == 1:
    # Read prepped NHD
    segments, _, _ = readSegments(index=idx)

else:
    # Initialize lists
    d = []
    
    # Loop through indices and store in lists
    for idx in indices:

        # Read prepped NHD
        segments, _, _ = readSegments(index=idx)
        # Append to list
        d.append(segments)
        
    # Merge GeoDataFrames
    segments = pd.concat(d)

read in
read in
read in


In [44]:
segments = segments.to_crs(epsg='32618')

In [90]:
segments_clip = segments.clip(pseudo_bounds)

In [102]:
segments_clip

Unnamed: 0,Bin,GNIS_Name,LengthKM,NHDPlusID,WidthM,segments
5544,"(0, 10]",,0.558,5.000700e+12,1.672232,"LINESTRING (822724.851 4719272.715, 822719.726..."
5544,"(0, 10]",,0.558,5.000700e+12,1.672232,"LINESTRING (822670.703 4719284.646, 822615.166..."
5544,"(0, 10]",,0.558,5.000700e+12,1.672232,"LINESTRING (822776.243 4719293.569, 822730.561..."
5544,"(0, 10]",,0.558,5.000700e+12,1.672232,"LINESTRING (822615.077 4719277.205, 822605.495..."
5544,"(0, 10]",,0.558,5.000700e+12,1.672232,"LINESTRING (822933.027 4719279.359, 822889.605..."
...,...,...,...,...,...,...
16028,"(0, 10]",York River,1.915,5.000600e+12,8.728796,"LINESTRING (845027.681 4787168.946, 845051.96 ..."
16028,"(0, 10]",York River,1.915,5.000600e+12,8.728796,"LINESTRING (844884.028 4787277.778, 844895.678..."
17472,"(0, 10]",York River,2.449,5.000600e+12,7.971184,"LINESTRING (844981.615 4787648.01, 844983.616 ..."
15645,"(0, 10]",Rogers Brook,3.893,5.000600e+12,4.011965,"LINESTRING (845194.975 4788511.416, 845235.064..."


In [105]:
segments['index1'] = segments.index

In [106]:
segments

Unnamed: 0,Bin,GNIS_Name,LengthKM,NHDPlusID,WidthM,segments,index1
0,"(0, 10]",,0.221,5.000900e+12,2.356104,"LINESTRING (740161.42 4790619.761, 740152.358 ...",0
0,"(0, 10]",,0.221,5.000900e+12,2.356104,"LINESTRING (740150.119 4790638.721, 740145.984...",0
0,"(0, 10]",,0.221,5.000900e+12,2.356104,"LINESTRING (740133.563 4790653.398, 740120.86 ...",0
0,"(0, 10]",,0.221,5.000900e+12,2.356104,"LINESTRING (740115.711 4790666.402, 740114.042...",0
0,"(0, 10]",,0.221,5.000900e+12,2.356104,"LINESTRING (740093.841 4790669.566, 740083.427...",0
...,...,...,...,...,...,...,...
26407,"(0, 10]",,0.215,5.000700e+12,7.173304,"LINESTRING (847688.595 4627231.959, 847707.46 ...",26407
26407,"(0, 10]",,0.215,5.000700e+12,7.173304,"LINESTRING (847707.46 4627242.562, 847714.371 ...",26407
26407,"(0, 10]",,0.215,5.000700e+12,7.173304,"LINESTRING (847726.889 4627252.051, 847746.645...",26407
26407,"(0, 10]",,0.215,5.000700e+12,7.173304,"LINESTRING (847746.645 4627260.896, 847763.411...",26407


In [103]:
segments.set_index('NHDPlusID')

Unnamed: 0_level_0,Bin,GNIS_Name,LengthKM,WidthM,segments
NHDPlusID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
5.000900e+12,"(0, 10]",,0.221,2.356104,"LINESTRING (740161.42 4790619.761, 740152.358 ..."
5.000900e+12,"(0, 10]",,0.221,2.356104,"LINESTRING (740150.119 4790638.721, 740145.984..."
5.000900e+12,"(0, 10]",,0.221,2.356104,"LINESTRING (740133.563 4790653.398, 740120.86 ..."
5.000900e+12,"(0, 10]",,0.221,2.356104,"LINESTRING (740115.711 4790666.402, 740114.042..."
5.000900e+12,"(0, 10]",,0.221,2.356104,"LINESTRING (740093.841 4790669.566, 740083.427..."
...,...,...,...,...,...
5.000700e+12,"(0, 10]",,0.215,7.173304,"LINESTRING (847688.595 4627231.959, 847707.46 ..."
5.000700e+12,"(0, 10]",,0.215,7.173304,"LINESTRING (847707.46 4627242.562, 847714.371 ..."
5.000700e+12,"(0, 10]",,0.215,7.173304,"LINESTRING (847726.889 4627252.051, 847746.645..."
5.000700e+12,"(0, 10]",,0.215,7.173304,"LINESTRING (847746.645 4627260.896, 847763.411..."
