In [1]:
# ----- Import required packages -----
import netCDF4
import numpy as np
import os
from scipy import io
import matplotlib.pyplot as plt
import os.path
from datetime import datetime
import xarray as xr
import rasterio as rio
import datetime as dt
import matplotlib
import pandas as pd
from datetime import datetime, timedelta
import datetime as dt
from pathlib import Path
from tqdm.notebook import tqdm
from scipy.cluster.vq import kmeans2
from scipy.stats import iqr, mode
from sklearn.cluster import KMeans
import hvplot.pandas
import geopandas as gpd

import holoviews as hv

# set dask client to None
client = None

hv.extension('bokeh')

# Select reservoir

In [1]:
start_date = '2022-01-01'
end_date = '2024-10-30'
RESERVOIR = '0810'
MIN_INTERSECTION_THRESHOLD = 900
buffer_amt = 50 # meters. unlike other types of data, nadir altimetry data works better with a smaller buffer around the reservoir. 
                 # The exact boundary of the reservoir is ideal, but is difficult to obtain automatically, so 50m buffer is a good compromise.

Code to process each swot file and generate altimeter heights.

In [2]:
secd2day=86400 #secd2day=1/86400
mjd_of_time_org=51544

def detect_missing_data(var_FillValue, data):
    if data==var_FillValue:
        data_Flag=1
    else:
        data_Flag=0
    return data_Flag

def process_swot_file(filepath, lat_boundary):
  cycno = filepath.name.split('_')[2][-3:]
  data=netCDF4.Dataset(filepath, 'r')
  elevations = []

  time_01 = data.groups['data_01']['time'][:]
  dim_01hz=len(time_01)
  time_intv1=time_01-0.5
  time_intv2=time_01+0.5
  time20ku= data.groups['data_20']['time'][:]
  dim_20hz=len(time20ku)
  mjd_20_ku = (time20ku/secd2day)+mjd_of_time_org
  lon= data.groups['data_20']['longitude'][:]
  lat= data.groups['data_20']['latitude'][:]

  # Geophysical Corrections
  poletide= data.groups['data_01']['pole_tide'][:]
  solid_earth_tide= data.groups['data_01']['solid_earth_tide'][:]

  # Atmospheric Corrections
  iono_cor_alt= data.groups['data_01']['ku']['iono_cor_alt'][:]
  iono_cor_gim= data.groups['data_01']['ku']['iono_cor_gim'][:]

  mod_dry_tropo_cor_meas= data.groups['data_01']['model_dry_tropo_cor_measurement_altitude'][:]
  # mod_dry_tropo_cor_zero= data.groups['data_20']['model_dry_tropo_cor_zero_altitude'][:]
  mod_wet_tropo_cor_meas= data.groups['data_01']['model_wet_tropo_cor_measurement_altitude'][:]
  # mod_wet_tropo_cor_zero= data.groups['data_20']['model_wet_tropo_cor_zero_altitude'][:]

  # Satellite Altitude
  altimetry= data.groups['data_20']['altitude'][:]
  # Ice Retracking
  range_ice= data.groups['data_20']['ku']['range_ice2'][:]
  sig0_ice= data.groups['data_20']['ku']['sig0_ice2'][:]
  # Ocean Retracking
  range_ocean= data.groups['data_20']['ku']['range_ocean'][:]
  sig0_ocean= data.groups['data_20']['ku']['sig0_ocean'][:]
  range_ocean_flag= data.groups['data_20']['ku']['range_ocean_compression_qual'][:]
  sig0_ocean_flag= data.groups['data_20']['ku']['sig0_ocean_compression_qual'][:]
  # OCOG Retracking
  range_ocog= data.groups['data_20']['ku']['range_ocog'][:]
  sig0_ocog= data.groups['data_20']['ku']['sig0_ocog'][:]
  range_mle3= data.groups['data_20']['ku']['range_ocean_mle3'][:]
  sig0_mle3= data.groups['data_20']['ku']['sig0_ocean_mle3'][:]
  # Sea-Ice Retracking
  range_sea_ice= data.groups['data_20']['ku']['range_seaice'][:]
  sig0_sea_ice= data.groups['data_20']['ku']['sig0_seaice'][:]

  # Surface Class/Type
  surf_class= data.groups['data_20']['surface_classification_flag'][:]

  sel_range = range_ocog.copy()
  sel_sig0 = sig0_ocog.copy()

  ## Fill Value
  pole_tide_01_FillValue=32767
  solid_earth_tide_01_FillValue=32767
  iono_cor_gim_01_ku_FillValue=32767
  mod_dry_tropo_cor_meas_altitude_01_FillValue=32767
  mod_wet_tropo_cor_meas_altitude_01_FillValue=32767

  alt_FillValue=2147483647
  range_FillValue=2147483647
  sig0_FillValue=32767
  elev_FillValue=2147483647

  for i in range(dim_01hz):
    pole_tide_Flag=detect_missing_data(pole_tide_01_FillValue,poletide[i])
    solid_earth_tide_Flag=detect_missing_data(solid_earth_tide_01_FillValue,solid_earth_tide[i])
    iono_cor_gim_ku_Flag=detect_missing_data(solid_earth_tide_01_FillValue,iono_cor_gim[i])
    mod_dry_tropo_cor_meas_altitude_Flag=detect_missing_data(solid_earth_tide_01_FillValue,mod_dry_tropo_cor_meas[i])
    mod_wet_tropo_cor_meas_altitude_Flag=detect_missing_data(solid_earth_tide_01_FillValue,mod_wet_tropo_cor_meas[i])

    correction=poletide[i]+solid_earth_tide[i]+iono_cor_gim[i]+mod_dry_tropo_cor_meas[i]+mod_wet_tropo_cor_meas[i]
    Flags=pole_tide_Flag+solid_earth_tide_Flag+iono_cor_gim_ku_Flag+mod_dry_tropo_cor_meas_altitude_Flag+mod_wet_tropo_cor_meas_altitude_Flag
    mtone = np.where(lat>=lat_boundary[0])
    mttwo = np.where(lat<=lat_boundary[1])
    idxarry = np.intersect1d(mtone,mttwo)
    for j in idxarry:
      alt_F=detect_missing_data(alt_FillValue,altimetry[j])
      range_F=detect_missing_data(range_FillValue,sel_range[j])
      sig0_F=detect_missing_data(sig0_FillValue,sel_sig0[j])

      Flags1=Flags
      Flags2=Flags1+alt_F+range_F+sig0_F
      if Flags2 == 0: 
        if time20ku[j]>=time_intv1[i] and time20ku[j]<=time_intv2[i]:
          correction2=correction
          hgt_20hz=altimetry[j]-(correction2+sel_range[j])
          mjd_20hz=mjd_20_ku[j]
          lon_20hz=lon[j]
          lat_20hz=lat[j]
          sig0_20hz=sig0_ocog[j]
          if hgt_20hz!=np.nan: # and np.abs(hgt_20hz - elev_ocog[j])<=10: ##
              elevations.append((0, 0, 0, 0, 0, 0, 0, 0, 0, cycno, mjd_20hz, lon_20hz, lat_20hz, hgt_20hz, sig0_20hz))
  return elevations


In [5]:
val_pts = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/validation-reservoirs-grand-rise-pts.geojson'))
val_polys = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/validation-reservoirs-grand-rise.geojson'))

selected_reservoirs = val_pts['tmsos_id'].tolist()
res_names = val_pts[['tmsos_id', 'name']].set_index('tmsos_id').to_dict()['name']

RESERVOIR_NAME = res_names[RESERVOIR]
print(f'{RESERVOIR}: {RESERVOIR_NAME}')

val_res_pt = val_pts.loc[val_pts['tmsos_id'].isin(selected_reservoirs)]
val_res_poly = val_polys.loc[val_polys['tmsos_id'].isin(selected_reservoirs)]


global_map = (
    val_res_pt.hvplot(
        geo=True, tiles='OSM'
    ) * val_res_pt[val_res_pt['tmsos_id'] == RESERVOIR].hvplot(
        geo=True, color='red', size=100, 
    )
).opts(
    title=f"Locations of validation reservoirs. {RESERVOIR_NAME}, highlighted in red"
)

global_map

0586: Webster, US


In [6]:
import hvplot.pandas
import geoviews as gv

swot_nadir_tracks_fp = Path("/tiger1/pdas47/tmsosPP/data/swot_orbit/sph_science_nadir/swot_science_orbit_sept2015-v2_nadir.shp")

roi = val_res_poly[val_res_poly['tmsos_id']==RESERVOIR]
utm_crs = roi.estimate_utm_crs()
buffered_roi_utm = roi.to_crs(utm_crs).geometry.buffer(buffer_amt)
buffered_roi = buffered_roi_utm.to_crs('epsg:4326')

# get lat-lon boundary
gd_track = gpd.read_file(swot_nadir_tracks_fp)
gd_track_subset = gd_track[gd_track.intersects(roi.geometry.values[0])]

plot_lims = buffered_roi.buffer(0.02).bounds
minx = plot_lims.values[0][0]
maxx = plot_lims.values[0][2]
miny = plot_lims.values[0][1]
maxy = plot_lims.values[0][3]

roi.hvplot(color='gray') * gd_track_subset.hvplot(
     by='ID_PASS', xlim=(minx, maxx), ylim=(miny, maxy), line_width=3, cmap='viridis'
).opts(aspect='equal', xlabel='longitude', ylabel='latitude', title=f"{RESERVOIR_NAME} - NADIR Passes")


  plot_lims = buffered_roi.buffer(0.02).bounds


IndexError: list index out of range

In [29]:
# ----- Functions of processing -----
# Deoutlier functions
def iqr_deoutlier(cyc_hgt_profile):
    IQR = iqr(cyc_hgt_profile, nan_policy='omit')
    high_prct = np.nanquantile(cyc_hgt_profile, 0.75)
    low_prct = np.nanquantile(cyc_hgt_profile, 0.25)
    index_retain_iqr = np.logical_and(cyc_hgt_profile>low_prct-1.5*IQR, cyc_hgt_profile<high_prct+1.5*IQR).flatten()

    return index_retain_iqr

def kmean_water_cluster(cyc_hgt_profile, cyc_sig_profile, hgt_cyc_range_thrd, hgt_cyc_std_thrd):
    hgt_cyc_range = np.nanmax(cyc_hgt_profile) - np.nanmin(cyc_hgt_profile)
    while hgt_cyc_range > hgt_cyc_range_thrd:
        kmeans_cluster = KMeans(n_clusters=2, random_state=42, n_init=10).fit_predict(cyc_hgt_profile)
        cyc_hgt_profile = cyc_hgt_profile[kmeans_cluster==mode(kmeans_cluster, keepdims=True)[0]]
        cyc_sig_profile = cyc_sig_profile[kmeans_cluster==mode(kmeans_cluster, keepdims=True)[0]]
        hgt_cyc_range = np.nanmax(cyc_hgt_profile) - np.nanmin(cyc_hgt_profile)

    hgt_cyc_mean = np.nanmean(cyc_hgt_profile)
    hgt_cyc_std = np.nanstd(cyc_hgt_profile)
    hgt_cyc_errmean = cyc_hgt_profile[:,0] - hgt_cyc_mean
    while hgt_cyc_std > hgt_cyc_std_thrd:
        if np.count_nonzero(~np.isnan(cyc_hgt_profile))==2:
            break
        hgt_cyc_errmean = cyc_hgt_profile - hgt_cyc_mean
        cyc_hgt_profile = cyc_hgt_profile[np.abs(hgt_cyc_errmean)!=np.nanmax(np.abs(hgt_cyc_errmean))]
        cyc_sig_profile = cyc_sig_profile[np.abs(hgt_cyc_errmean)!=np.nanmax(np.abs(hgt_cyc_errmean))]
        hgt_cyc_std = np.nanstd(cyc_hgt_profile)
        hgt_cyc_mean = np.nanmean(cyc_hgt_profile)

    return cyc_hgt_profile, cyc_sig_profile

def decimal_years_to_calendar(excel_date):
  formatted_dates = []

  for decimal_year in excel_date:
    year = int(decimal_year)
    days_fraction = (decimal_year) - (year)
    start_date = datetime(year, 1, 1)
    end_date = datetime(year + 1, 1, 1)
    delta = end_date - start_date

    result_date = start_date + timedelta(days=(days_fraction * delta.days)-1)
    formatted_date = result_date.strftime('%Y-%m-%d')
    formatted_dates.append(formatted_date)

  return formatted_dates

def get_time_series(input_data, index_retain, hgt_cyc_range_thrd, hgt_cyc_std_thrd):
    input_data = input_data[index_retain,:]
    cycno_list = input_data[:,0]
    uniq_cycno = np.unique(cycno_list)

    ct_cyc=0
    FinalSeries=np.empty((len(uniq_cycno),8))
    FinalSeries[:]=np.nan
    for cycno in uniq_cycno[:]:

        index_cyc = cycno_list==cycno
        cyc_data = input_data[index_cyc,:]

        mjd = cyc_data[:,1].reshape(-1,1)
        lon = cyc_data[:,2].reshape(-1,1)
        lat = cyc_data[:,3].reshape(-1,1)
        hgt = cyc_data[:,4].reshape(-1,1)
        sig = cyc_data[:,5].reshape(-1,1)

        hgt, sig = kmean_water_cluster(hgt, sig, hgt_cyc_range_thrd, hgt_cyc_std_thrd)

        cyc_time = (np.nanmean(mjd)+2108-50000)/365.25 +1990
        cyc_lon = np.nanmean(lon)
        cyc_lat = np.nanmean(lat)

        cyc_hgt = np.nanmean(hgt)
        #cyc_unc_hgt = uncertainty(hgt)
        cyc_std_hgt = np.nanstd(hgt)
        cyc_sig = np.nanmean(sig)
        cyc_retain_rate = hgt.shape[0] / cyc_data.shape[0]

        FinalSeries[ct_cyc,:] = [cycno, cyc_time, cyc_lon, cyc_lat, cyc_hgt, cyc_std_hgt, cyc_sig, cyc_retain_rate]

        ct_cyc=ct_cyc+1

    return FinalSeries

def get_time_series_egm08(ip, FinalSeries):
    ## Time-series of water levels (convert to w.r.t. EGM-2008)
    N=ip((FinalSeries[:,3],FinalSeries[:,2]))
    FinalSeries[:,4]=FinalSeries[:,4]-N

    egm08_timeseries = FinalSeries[:,[0,4,5,1,2,3]]

    return egm08_timeseries

In [30]:
from scipy.interpolate import RegularGridInterpolator

# Setting in cluster refinement (Okeowo et al., 2017)
hgt_cyc_range_thrd = 5
hgt_cyc_std_thrd = 0.3

def okeowo_2017(extracted_elevations_fp, min_lat, max_lat, hgt_cyc_range_thrd=5, hgt_cyc_std_thrd=0.3, egm2008_fp="/tiger1/pdas47/tmsosPP/data/geoid/geoidegm2008grid.mat"):
  module1 = extracted_elevations_fp  # extracted path
  module2 = min_lat
  module3 = max_lat

  DataFolder2 = module1

  lat_min = float(module2)
  lat_max = float(module3)

  lat_boundary = [lat_min, lat_max]

  # please input the path of the EGM2008
  lonbp = io.loadmat(egm2008_fp)['lonbp']
  latbp = io.loadmat(egm2008_fp)['latbp']
  grid=io.loadmat(egm2008_fp)['grid']

  ip=RegularGridInterpolator(points=(latbp.flatten(),lonbp.flatten()), values=grid, bounds_error=False, fill_value=np.nan)

  # 4. Altimetry data
  try:
    input_data = np.loadtxt(DataFolder2)[:,9:]
    lat_all=input_data[:,3]
    index_lat_range = np.logical_and(lat_all>lat_boundary[0], lat_all<lat_boundary[1])
    input_data=input_data[index_lat_range,:]

    lon_all = input_data[:,2].reshape(-1,1)
    lat_all = input_data[:,3].reshape(-1,1)
    hgt_all = input_data[:,4].reshape(-1,1)
  except:
    input_data = np.loadtxt(DataFolder2)[9:]

    lat_all=input_data[3]
    index_lat_range = np.logical_and(lat_all>lat_boundary[0], lat_all<lat_boundary[1])
    input_data=input_data[index_lat_range]

    lon_all = input_data[0][2]
    lat_all = input_data[0][3]
    hgt_all = input_data[0][4]

  index_retain_iqr = iqr_deoutlier(hgt_all)
  FinalSeries_iqr = get_time_series(input_data, index_retain_iqr, hgt_cyc_range_thrd, hgt_cyc_std_thrd)

  egm08_timeseries = get_time_series_egm08(ip, FinalSeries_iqr)

  # convert to dataframe
  df = pd.DataFrame(egm08_timeseries, columns=['cycle', 'wse_egm08', 'uncertainty', 'time', 'lon', 'lat'])

  return df

In [50]:
def classify_swot_nadir_altimeter(
        roi, start_date, end_date,
        intersection_length_threshold=300, # meters. jason class altimeters require at least 300 m of intersection with the ROI.
        swot_nadir_tracks_fp = Path("/tiger1/pdas47/tmsosPP/data/swot_orbit/sph_science_nadir/swot_science_orbit_sept2015-v2_nadir.shp"),
        swot_nadir_raw_dir = Path("/tiger1/pdas47/tmsosPP/data/swot_nadir_hlee/raw"),
        swot_nadir_extracted_dir = Path("/tiger1/pdas47/tmsosPP/data/swot_nadir_hlee/extracted"),
        dask_client = None,
        force_extract = False
    ):
    # create output directory if it doesn't exist
    swot_nadir_extracted_dir.mkdir(exist_ok=True)

    # get lat-lon boundary
    gd_track = gpd.read_file(swot_nadir_tracks_fp)
    gd_track_subset = gd_track[gd_track.intersects(roi.geometry.values[0])]

    # filter by intersection length threshold
    nadir_intersects = gpd.GeoDataFrame(geometry=gd_track_subset.intersection(roi.geometry.values[0]).explode(index_parts=False)) # explode = split multi-geometry into single geometries
    nadir_intersects['intersect_length_m'] = nadir_intersects.to_crs(roi.estimate_utm_crs()).length
    nadir_intersects = nadir_intersects[nadir_intersects['intersect_length_m'] > intersection_length_threshold]
    if nadir_intersects.empty:
        raise ValueError(f"No intersections found with length greater than the intersection length threshold ({intersection_length_threshold} m).")
    nadir_intersects['pass_num'] = gd_track_subset['ID_PASS'].loc[nadir_intersects.index]
    nadir_intersects['min_lat'] = nadir_intersects.bounds['miny']
    nadir_intersects['max_lat'] = nadir_intersects.bounds['maxy']
    
    swot_nadir_timeseries_all = []
    # for each pass in ROI, extract data
    for idx, row in nadir_intersects.iterrows():
        extracted_elevations_fp = swot_nadir_extracted_dir / f"SWOT_{RESERVOIR}_{row['pass_num']}_{row['min_lat']}_{row['max_lat']}_info.txt"
        pass_num = row['pass_num']
        lat_min = row['min_lat']
        lat_max = row['max_lat']
        intersect_length = row['intersect_length_m']

        if extracted_elevations_fp.exists() and force_extract==False:  # skip if the processed file already exists
            print(f"Skipping {extracted_elevations_fp} as it already exists")
        else:
            # obtain file paths that match the pass number
            swot_nadir_fps = list(swot_nadir_raw_dir.glob(f'SWOT_GPN_*_{pass_num:03}_*.nc'))
            ipn_files = list(swot_nadir_raw_dir.glob(f'SWOT_IPN_*_{pass_num:03}_*.nc'))
            if len(ipn_files) > 0:
                swot_nadir_fps = swot_nadir_fps + ipn_files


            # filter by start and end date
            swot_nadir_fps = list(filter(
                lambda fp: pd.to_datetime(fp.name.split('_')[4], format='%Y%m%d') >= pd.to_datetime(start_date) \
                    and pd.to_datetime(fp.name.split('_')[4], format='%Y%m%d') <= pd.to_datetime(end_date),
                        swot_nadir_fps
            ))
            
            # process the files. If dask client is provided, process the files parallely
            futures = []
            if dask_client is None:
                with open(extracted_elevations_fp, "w") as text_file:
                    print(f"writing to {extracted_elevations_fp}")
                    for fp in swot_nadir_fps:
                        processed_elevation_info = process_swot_file(fp, lat_boundary=[lat_min, lat_max])

                        for elevation_line in processed_elevation_info:
                            text_file.write("%4s\t%4s\t%4s\t%4s\t%4s\t%4s\t%4s\t%4s\t%4s\t%4s\t%20.6f\t%20.6f\t%20.6f\t%20.6f\t%10.3f\n" % elevation_line)
            else:
                # dask client is provided. Process the files parallely
                futures = []
                for fp in swot_nadir_fps:
                    future = dask_client.submit(process_swot_file, fp, lat_boundary=[lat_min, lat_max])
                    futures.append(future)
                processed_elevation_infos = dask_client.gather(futures)
                
                # write the processed info into text file
                with open(extracted_elevations_fp, "w") as text_file:
                    print(f"Writing to {extracted_elevations_fp}")
                    for processed_elevation_info in processed_elevation_infos:
                        for elevation_line in processed_elevation_info:
                            text_file.write("%4s\t%4s\t%4s\t%4s\t%4s\t%4s\t%4s\t%4s\t%4s\t%4s\t%20.6f\t%20.6f\t%20.6f\t%20.6f\t%10.3f\n" % elevation_line)

        swot_nadir_timeseries = okeowo_2017(
            extracted_elevations_fp, lat_min, lat_max, 
            hgt_cyc_range_thrd, hgt_cyc_std_thrd, 
            egm2008_fp="/tiger1/pdas47/tmsosPP/data/geoid/geoidegm2008grid.mat"
        )
        swot_nadir_timeseries['lon'] = swot_nadir_timeseries['lon'].apply(lambda lon: lon - 360 if lon > 180 else lon)
        swot_nadir_timeseries['pass_num'] = pass_num
        swot_nadir_timeseries['intersect_length_m'] = intersect_length
        swot_nadir_timeseries_all.append(swot_nadir_timeseries)

    return pd.concat(swot_nadir_timeseries_all)

swot_elevations = classify_swot_nadir_altimeter(
    roi=buffered_roi,
    intersection_length_threshold=900, # meters
    start_date=start_date,
    end_date=end_date,
    dask_client=client,
    # dask_client=None,
    force_extract=True
)


ValueError: No intersections found with length greater than the intersection length threshold (900 m).

In [32]:
swot_elevations

Unnamed: 0,cycle,wse_egm08,uncertainty,time,lon,lat,pass_num,intersect_length_m
0,1.0,323.61723,0.3234,2023.570282,-114.154781,36.017235,205,1026.599558
1,2.0,324.157282,0.20185,2023.627406,-114.162073,36.015233,205,1026.599558
2,3.0,324.754739,0.15525,2023.68453,-114.161026,36.016022,205,1026.599558
3,4.0,325.150077,0.17515,2023.741654,-114.152457,36.015243,205,1026.599558
4,5.0,325.00282,0.0136,2023.798779,-114.157427,36.015859,205,1026.599558
5,6.0,323.968395,0.327,2023.855903,-114.161861,36.015561,205,1026.599558
6,7.0,324.337429,0.262891,2023.913027,-114.159431,36.014935,205,1026.599558
7,8.0,325.511604,0.171334,2023.970151,-114.153672,36.014609,205,1026.599558
8,9.0,326.083465,0.053393,2024.027276,-114.155272,36.014792,205,1026.599558
9,10.0,326.863837,0.244966,2024.0844,-114.158119,36.015075,205,1026.599558


In [33]:
elevations_avg = swot_elevations.groupby(['cycle', 'pass_num']).agg({"wse_egm08": 'mean', "time": "mean"}).reset_index()
elevations_avg

Unnamed: 0,cycle,pass_num,wse_egm08,time
0,1.0,205,323.788644,2023.570282
1,2.0,205,324.331968,2023.627406
2,3.0,205,325.176717,2023.68453
3,4.0,205,325.328648,2023.741654
4,5.0,205,325.213859,2023.798779
5,6.0,205,324.28891,2023.855903
6,7.0,205,324.574268,2023.913027
7,8.0,205,325.566767,2023.970151
8,9.0,205,326.140721,2024.027276
9,10.0,205,327.109558,2024.0844


In [34]:
from datetime import datetime, timedelta

def decimalyear_to_timestamp(time):
    year = int(time)
    rem = time - year
    base = datetime(year, 1, 1)
    result = base + timedelta(seconds=(base.replace(year=base.year + 1) - base).total_seconds() * rem)
    
    return pd.Timestamp(result)

elevations_avg['time_parsed'] = elevations_avg['time'].apply(decimalyear_to_timestamp)
elevations_avg

Unnamed: 0,cycle,pass_num,wse_egm08,time,time_parsed
0,1.0,205,323.788644,2023.570282,2023-07-28 03:39:58.538808
1,2.0,205,324.331968,2023.627406,2023-08-18 00:04:30.412524
2,3.0,205,325.176717,2023.68453,2023-09-07 20:29:02.399031
3,4.0,205,325.328648,2023.741654,2023-09-28 16:53:31.761115
4,5.0,205,325.213859,2023.798779,2023-10-19 13:18:01.781569
5,6.0,205,324.28891,2023.855903,2023-11-09 09:42:33.122664
6,7.0,205,324.574268,2023.913027,2023-11-30 06:07:04.770596
7,8.0,205,325.566767,2023.970151,2023-12-21 02:31:34.851655
8,9.0,205,326.140721,2024.027276,2024-01-10 23:35:22.899693
9,10.0,205,327.109558,2024.0844,2024-01-31 21:22:10.881432


In [35]:
elevations_avg.hvplot(
    x='time_parsed', y='wse_egm08', kind='scatter', by='pass_num'
).opts(
    title=f"SWOT NADIR altimeter: {RESERVOIR_NAME}"
)

In [36]:
roi.hvplot(color='gray') * gd_track_subset.hvplot(
    by='ID_PASS', xlim=(minx, maxx), ylim=(miny, maxy), line_width=3, cmap='viridis'
).opts(aspect='equal', xlabel='longitude', ylabel='latitude', title=f"{RESERVOIR_NAME} - NADIR Passes") * swot_elevations.hvplot(
    x='lon', y='lat', color='wse_egm08', kind='points', cmap='brg', clabel='Elevation (m)'
)

save data.

In [37]:
time_frac = swot_elevations['time']

In [38]:
swot_elevations['time'] = pd.to_datetime(decimal_years_to_calendar(time_frac))
swot_elevations_ds = swot_elevations.rename({'pass_num': 'pass_ids', 'wse_egm08': 'elevation'}, axis=1).set_index('time').to_xarray()

# add attributes
swot_elevations_ds['cycle'].attrs['long_name'] = 'Cycle number'
swot_elevations_ds['cycle'].attrs['description'] = 'Cycle number of the SWOT altimeter'
swot_elevations_ds['elevation'].attrs['long_name'] = 'Water Surface Elevation'
swot_elevations_ds['elevation'].attrs['description'] = 'Water Surface Elevation above EGM2008'
swot_elevations_ds['elevation'].attrs['units'] = 'm'
swot_elevations_ds['uncertainty'].attrs['long_name'] = 'Uncertainty in Water Surface Elevation'
swot_elevations_ds['uncertainty'].attrs['description'] = 'Uncertainty in Water Surface Elevation'
swot_elevations_ds['uncertainty'].attrs['units'] = 'm'
swot_elevations_ds['lon'].attrs['long_name'] = 'Longitude'
swot_elevations_ds['lon'].attrs['description'] = 'Longitude of the SWOT altimeter'
swot_elevations_ds['lon'].attrs['units'] = 'degrees_east'
swot_elevations_ds['lat'].attrs['long_name'] = 'Latitude'
swot_elevations_ds['lat'].attrs['description'] = 'Latitude of the SWOT altimeter'
swot_elevations_ds['lat'].attrs['units'] = 'degrees_north'
swot_elevations_ds['pass_ids'].attrs['long_name'] = 'Pass IDs of the SWOT altimeter'
swot_elevations_ds['pass_ids'].attrs['description'] = 'Pass IDs of the SWOT altimeter which represents it\'s ground tracks that intersect with the reservoir'
swot_elevations_ds['intersect_length_m'].attrs['long_name'] = 'Intersection Length'
swot_elevations_ds['intersect_length_m'].attrs['description'] = 'Length of intersection between the SWOT altimeter\'s ground track and the reservoir'
swot_elevations_ds['intersect_length_m'].attrs['units'] = 'm'

swot_elevations_ds

In [39]:
DATA_DIR = Path('/tiger1/pdas47/tmsosPP/data')
ALG_VERSION = '0.1' # h.lee. 2024 with Okeowo et al. 2017

save_fp = DATA_DIR / 'swot' / 'output' / f'{RESERVOIR}_swot_{ALG_VERSION}.nc'
swot_elevations_ds.to_netcdf(save_fp)
print(f'saved at {save_fp}')

saved at /tiger1/pdas47/tmsosPP/data/swot/output/1078_swot_0.1.nc


save in csv format

In [42]:
csv_save_fp = DATA_DIR / 'elevation' / 'swot_nadir' / f'{RESERVOIR}.csv'
csv_save_fp.parent.mkdir(exist_ok=True, parents=True)

swot_elevations_ds.to_pandas().reset_index().to_csv(csv_save_fp, index=False)
print(f"Saved at {csv_save_fp}")

Saved at /tiger1/pdas47/tmsosPP/data/elevation/swot_nadir/1078.csv


end of notebook.