In [6]:
# Fiona Bennitt
# 08/07/2024
# Based on the cookbook here:
# https://podaac.github.io/tutorials/notebooks/datasets/SWOT_PIXC_Area_localmachine.html

In [7]:
import dask # get error with xr.open without
import earthaccess
import geopandas as gpd
import h5netcdf
import numpy as np
import pandas as pd
import xarray as xr

In [8]:
# Suppress scientific notation
np.set_printoptions(suppress=True)

#### Get Pixel Cloud

In [9]:
filepath = '../data/small_rivers/mar_2024_ver_c/SWOT_L2_HR_PIXC_011_345_239R_20240227T044209_20240227T044220_PIC0_01.nc'

In [10]:
ds_PIXC = xr.open_mfdataset(filepath, group = 'pixel_cloud', engine='h5netcdf')

#### Look at distribution of geolocation_qual

In [22]:
# Excluding layover_impact as it is is only useful for aggregated heights
variables = [
             # 'azimuth_index', 'range_index', 'water_frac', 'water_frac_uncert',
             # 'classification', 'false_detection_rate', 'missed_detection_rate',
             # 'prior_water_prob', 'bright_land_flag', 'height', 'geoid',
             # 'ancillary_surface_classification_flag', 'interferogram_qual',
             # 'classification_qual', 
             'geolocation_qual'
             # , 'sig0_qual'
            ]

In [23]:
# Make dict of desired variables
d = {}
for var in variables:
    d[var] = ds_PIXC[var]

In [24]:
# Make DataFram
df = pd.DataFrame(data=d, index=range(ds_PIXC.sizes['points']))

In [25]:
# Check out dstribution of flags present
counts = df.geolocation_qual.value_counts().sort_index()
counts

geolocation_qual
64.0         688758
65.0          24286
66.0            135
67.0             49
68.0        3612433
69.0         773722
70.0           2001
71.0            694
4160.0          229
4161.0           18
4162.0            5
4163.0            1
4164.0        28971
4165.0         6575
4166.0           28
4167.0            8
524352.0     123872
524353.0       2727
524356.0     439747
524357.0      33387
524358.0        389
524359.0         41
528448.0         89
528449.0          3
528452.0       5001
528453.0        319
528454.0          1
528455.0          1
Name: count, dtype: int64

#### Make masked GeoDataFrame

In [18]:
# See page 65 of PIXC PDD: https://podaac.jpl.nasa.gov/SWOT?tab=datasets-information&sections=about%2Bdata

def bitwiseMask(ds):
    # Fow now, just eliminating medium_phase_suspect (4096) and beyond
    # Ideally would discard all pixels with xovercal_suspect, but many
    # granules have 0 pixels that pass that filter
    
    # May want to set ds.classification > 2 if land_near_water not desired
    mask = np.where(np.logical_and(ds.classification > 1,
                                   ds.geolocation_qual < 2**12)
                                   # could add additional flags here
                                   )[0]
    
    print(mask.shape)
    return mask

In [19]:
def makeGDF(pixc):
    # This function takes the pixel cloud, makes a GeoDataFrame
    # with just the classification values, renames columns as
    # needed, projects the CRS, and returns the GDF
    
    # Create GDF
    gdf_PIXC = gpd.GeoDataFrame(ds_PIXC.classification[mask],
                        geometry=gpd.points_from_xy(ds_PIXC.longitude[mask],
                                                    ds_PIXC.latitude[mask]),
                        crs="EPSG:4326") # PIXC has no native CRS,
                                         # setting same as River_SP

    gdf_PIXC.rename(columns={gdf_PIXC.columns[0]: 'klass'}, inplace=True)
    
    # Convert the crs to WGS 84 / Pseudo-Mercator
    gdf_PIXC = gdf_PIXC.to_crs(epsg=3857)
    return gdf_PIXC

In [20]:
# Make mask
mask = bitwiseMask(ds_PIXC)

# Make masked PIXC GDF
gdf_PIXC = makeGDF(pixc=ds_PIXC)

(1628640,)
