Goal: pull out 1 buoy record that has ssc&chla and search for matching icesat2 granules

To do:
- Load info for 1 relevant station
- Find all matching atl granules and save times from file names
- Compare times to see if any matching buoy times

In [1]:
%pip install --quiet "sliderule @ git+https://github.com/SlideRuleEarth/sliderule#subdirectory=clients/python"

Note: you may need to restart the kernel to use updated packages.


In [1]:
%pip install --quiet erddapy

Note: you may need to restart the kernel to use updated packages.


In [1]:
from erddapy import ERDDAP
import pandas as pd
import numpy as np
from sliderule import sliderule, icesat2, earthdata
from datetime import datetime, timezone, timedelta

pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

sliderule.init(verbose=False)

Client (version (4, 5, 3)) is out of date with the server (version (4, 8, 3))


False

In [2]:
def fname2datetime(fname):
    y = int(fname[6:10])
    m = int(fname[10:12])
    d = int(fname[12:14])
    H = int(fname[14:16])
    M = int(fname[16:18])
    S = int(fname[18:20])

    t = datetime(y,m,d,H,M,S, tzinfo=timezone.utc)
    return t

def buoy_bound_box(lat,lon,buffer_km):
    # define a buffer distance around the buoy to search for icesat-2 data
    lat_buff = buffer_km/111 # convert buffer distance to frac of 1 deg lat
    lon_buff = buffer_km/(111*np.cos(lat*np.pi/180)) # convert buffer distance to frac of 1 deg lon
    # define bounding box around the buoy (WSEN)
    # example: bbox = [-108.3, 39.2, -107.8, 38.8]
    # bbox = [lon-lon_buff,lat+lat_buff,lon+lon_buff,lat-lat_buff]
    # region = sliderule.toregion(bbox)
    minx = lon - lon_buff
    miny = lat - lat_buff
    maxx = lon + lon_buff
    maxy = lat + lat_buff

    poly = [{'lon': minx, 'lat': miny},
            {'lon': maxx, 'lat': miny},
            {'lon': maxx, 'lat': maxy},
            {'lon': minx, 'lat': maxy},
            {'lon': minx, 'lat': miny}] # Closing the loop by repeating the first point
    return poly


In [3]:
# load a list of all the relevant ERDDAPs and their urls
FF = pd.read_pickle("labeled_relevant_stations.pkl")

the next 2 cells have different searches that Ive done:
1. anything with phot/rad at a buoy
2. all the GCOOS ctd profiles

In [4]:
FF = FF[(FF.buoy == True) & ((FF.radiation==True) | (FF.photo==True))]
# remove stations with PAR data in air or just no good data
FF = FF[19:]
FF = FF.drop(index=[34,35,36,37,39,40])
FF.reset_index(drop=True, inplace=True)

FF["geospatial_lat_min"] = pd.to_numeric(FF["geospatial_lat_min"])
FF["geospatial_lon_min"] = pd.to_numeric(FF["geospatial_lon_min"])
FF["geospatial_lat_max"] = pd.to_numeric(FF["geospatial_lat_max"])
FF["geospatial_lon_max"] = pd.to_numeric(FF["geospatial_lon_max"])
FF["photon_data"] = False

# only trying to get a few extra stations missed in the original search (10/28/2024)
# idx = list([537,538,556,557])
# FF = FF.iloc[idx]
FF

In [None]:
# search parameters
search_hrs = 3
search_km = 3
# loop through each buoy asset:
for jj in range(len(FF)):
# for jj in range(4,7):
    print('working on ' + str(jj+1)+'/'+str(len(FF)))
    # define a search region around the buoy 
    lat = FF['geospatial_lat_min'][jj]
    lon = FF['geospatial_lon_min'][jj]
    poly = buoy_bound_box(lat,lon,search_km)

    # search CMR for ATL03 granules in the bounding box
    grns = earthdata.cmr(short_name="ATL03",
                         polygon=poly,
                         version='006')
    # save the times for each granule as a datetime object
    icesat_times = [fname2datetime(fname) for fname in grns]

    # now check if buoy data exists for these granules\n",
    e = ERDDAP(server=FF['url'][jj],
               protocol="tabledap",
               response="csv")
    e.dataset_id = FF['sites'][jj]

    for t in icesat_times:
        # add a time buffer (+/-1 hours) to search for relevant buoy data for each granule,
        t_start = (t-timedelta(hours=search_hrs)).strftime("%Y-%m-%dT%H:%M:%SZ")
        t_end = (t+timedelta(hours=search_hrs)).strftime("%Y-%m-%dT%H:%M:%SZ")
        e.constraints = {"time>=": t_start,
                        "time<=": t_end}

        # try to download the associated buoy data
        try:
            buoy = e.to_pandas()           
        except:
            continue
        # if buoy data exists, download the ATL03 photons in the bounding box at this time
        
        # Build ATL03 Request
        poly = buoy_bound_box(lat,lon,search_km)       
        parms = {"poly": poly,
                 "t0": t_start,
                 "t1": t_end,
                 "track": 0,
                 "len": 20.0,
                 "pass_invalid": True,
                 "cnf": -2, # returns all photons
                 "srt": icesat2.SRT_OCEAN}
        atl_gdb = icesat2.atl03sp(parms)
        
        if len(atl_gdb)>0:            
            FF.loc[jj,"photon_data"] = True            
            # atl_gdb.to_pickle('icesat2_'+str(e.dataset_id)+'_'+str(t_start)+'.pkl')
            print('success '+ e.dataset_id + ' ' + str(t_start))
            print('no. of photons: '+str(len(atl_gdb)))
            # buoy.to_csv('data_'+str(e.dataset_id)+'_'+str(t_start)+'.csv')


working on 1/1576
working on 2/1576
working on 3/1576
working on 4/1576
working on 5/1576
working on 6/1576
working on 7/1576
working on 8/1576
working on 9/1576
working on 10/1576
working on 11/1576
working on 12/1576
working on 13/1576
working on 14/1576
working on 15/1576
working on 16/1576
working on 17/1576
working on 18/1576
working on 19/1576
working on 20/1576
working on 21/1576
working on 22/1576
working on 23/1576
