Goal: pull out 1 buoy record that has ssc&chla and search for matching icesat2 granules

To do:
- Load info for 1 relevant station
- Find all matching atl granules and save times from file names
- Compare times to see if any matching buoy times

In [1]:
%pip install --quiet "sliderule @ git+https://github.com/SlideRuleEarth/sliderule#subdirectory=clients/python"

Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip install --quiet erddapy

Note: you may need to restart the kernel to use updated packages.


In [1]:
from erddapy import ERDDAP
import pandas as pd
import numpy as np
import glob
from sliderule import sliderule, icesat2, earthdata
from datetime import datetime, timezone, timedelta

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

sliderule.init(verbose=False)

True

In [2]:
def fname2datetime(fname):
    y = int(fname[6:10])
    m = int(fname[10:12])
    d = int(fname[12:14])
    H = int(fname[14:16])
    M = int(fname[16:18])
    S = int(fname[18:20])

    t = datetime(y,m,d,H,M,S, tzinfo=timezone.utc)
    return t

def buoy_bound_box(lat,lon,buffer_km):
    # define a buffer distance around the buoy to search for icesat-2 data
    lat_buff = buffer_km/111 # convert buffer distance to frac of 1 deg lat
    lon_buff = buffer_km/(111*np.cos(lat*np.pi/180)) # convert buffer distance to frac of 1 deg lon
    # define bounding box around the buoy (WSEN)
    # example: bbox = [-108.3, 39.2, -107.8, 38.8]
    # bbox = [lon-lon_buff,lat+lat_buff,lon+lon_buff,lat-lat_buff]
    # region = sliderule.toregion(bbox)
    minx = lon - lon_buff
    miny = lat - lat_buff
    maxx = lon + lon_buff
    maxy = lat + lat_buff

    poly = [{'lon': minx, 'lat': miny},
            {'lon': maxx, 'lat': miny},
            {'lon': maxx, 'lat': maxy},
            {'lon': minx, 'lat': maxy},
            {'lon': minx, 'lat': miny}] # Closing the loop by repeating the first point
    return poly


In [29]:
# load a list of all the relevant ERDDAPs and their urls
FF = pd.read_pickle("labeled_relevant_stations.pkl")
# there are ltos of different searches to do...
FF = FF[(
    FF.url!='https://gcoos5.geos.tamu.edu/erddap/') & (
        FF.radiation == True) & (
        FF.buoy == False) & (
        FF.url!='https://gliders.ioos.us/erddap/')]
FF.reset_index(drop=True, inplace=True)

FF["geospatial_lat_min"] = pd.to_numeric(FF["geospatial_lat_min"])
FF["geospatial_lon_min"] = pd.to_numeric(FF["geospatial_lon_min"])
FF["geospatial_lat_max"] = pd.to_numeric(FF["geospatial_lat_max"])
FF["geospatial_lon_max"] = pd.to_numeric(FF["geospatial_lon_max"])
FF["photon_data"] = False
# FF

In [30]:
#FF.url.unique()

array(['https://erddap.observations.voiceoftheocean.org/erddap/',
       'https://erddap.ogsl.ca/erddap/',
       'https://coastwatch.pfeg.noaa.gov/erddap/',
       'https://erddap.bco-dmo.org/erddap/',
       'https://erddap.sensors.ioos.us/erddap/',
       'http://erddap.cencoos.org/erddap/',
       'http://www.neracoos.org/erddap/',
       'https://pae-paha.pacioos.hawaii.edu/erddap/',
       'http://osmc.noaa.gov/erddap/', 'http://dap.onc.uvic.ca/erddap/',
       'https://erddap-goldcopy.dataexplorer.oceanobservatories.org/erddap/',
       'https://upwell.pfeg.noaa.gov/erddap/',
       'https://ferret.pmel.noaa.gov/pmel/erddap',
       'https://polarwatch.noaa.gov/erddap/',
       'https://www.smartatlantic.ca/erddap/',
       'https://erddap.griidc.org/erddap/',
       'https://cioosatlantic.ca/erddap/'], dtype=object)

In [5]:
# search parameters
search_hrs = 3
search_km = 3

# loop through each cast:
for jj in range(len(FF)):
    if jj % 10 == 0:
        print('working on ' + str(jj+1)+'/'+str(len(FF)))
    
    # set up erddap request:
    e = ERDDAP(server=FF['url'][jj],
               protocol="tabledap",
               response="csv")
    e.dataset_id = FF['sites'][jj]
    # try to download the associated buoy data - skip this entry if it fails
    try:
        buoy = e.to_pandas()           
    except:
        continue

    # define a search region around the cast 
    lat = FF['geospatial_lat_min'][jj]
    lon = FF['geospatial_lon_min'][jj]
    poly = buoy_bound_box(lat,lon,search_km)

    # add a time buffer to search for relevant sat data for each cast
    t = datetime.fromisoformat(FF['time_coverage_start'][jj])
    t_start = (t-timedelta(hours=search_hrs)).strftime("%Y-%m-%dT%H:%M:%SZ")
    t_end = (t+timedelta(hours=search_hrs)).strftime("%Y-%m-%dT%H:%M:%SZ")
    
    # Build ATL03 Request
    parms = {"poly": poly,
             "t0": t_start,
             "t1": t_end,
             "track": 0,
             "len": 20.0,
             "pass_invalid": True,
             "cnf": -2, # returns all photons
             "srt": icesat2.SRT_OCEAN}
    atl_gdb = icesat2.atl03sp(parms)

   
    if len(atl_gdb)>0:            
        FF.loc[jj,"photon_data"] = True            
        atl_gdb.to_pickle('icesat2_'+str(e.dataset_id)+'.pkl')
        print('success '+ e.dataset_id + ' ' + str(t_start))
        print('no. of photons: '+str(len(atl_gdb)))
        buoy.to_csv('data_'+str(e.dataset_id)+'.csv')


working on 1331/1576
working on 1341/1576
working on 1351/1576
working on 1361/1576
working on 1371/1576
working on 1381/1576
success WS22337_WS22337_WS22337_Stn_TB1 2022-12-06T16:31:24Z
no. of photons: 73356
working on 1391/1576
success WS22337_WS22337_WS22337_Stn_TB2 2022-12-06T17:09:35Z
no. of photons: 135943
working on 1401/1576
working on 1411/1576
working on 1421/1576
working on 1431/1576
working on 1441/1576
working on 1451/1576
working on 1461/1576
working on 1471/1576
working on 1481/1576
working on 1491/1576
working on 1501/1576
working on 1511/1576
working on 1521/1576
working on 1531/1576
working on 1541/1576
working on 1551/1576
working on 1561/1576
working on 1571/1576


In [13]:
# match BCO-DMO files (each DF is a little different...)
FF = glob.glob('bcodmo*.pkl')
# search parameters
search_hrs = 3
search_km = 3

# loop through each cast:
for jj in range(5,len(FF)):
    print('working on ' + str(jj+1)+'/'+str(len(FF)))
    
    # load pre-downloaded dataframe with time and lat/lon
    e = pd.read_pickle(FF[jj])

    # search each cast entry: 
    for kk in range(len(e)):
        # define a search region around the cast 
        lat = float(e.loc[kk,'latitude'])
        lon = float(e.loc[kk,'longitude'])
        poly = buoy_bound_box(lat,lon,search_km)
    
        # add a time buffer to search for relevant sat data for each cast
        t = datetime.fromisoformat(e.loc[kk,'time'])
        t_start = (t-timedelta(hours=search_hrs)).strftime("%Y-%m-%dT%H:%M:%SZ")
        t_end = (t+timedelta(hours=search_hrs)).strftime("%Y-%m-%dT%H:%M:%SZ")
        
        # Build ATL03 Request
        parms = {"poly": poly,
                 "t0": t_start,
                 "t1": t_end,
                 "track": 0,
                 "len": 20.0,
                 "pass_invalid": True,
                 "cnf": -2, # returns all photons
                 "srt": icesat2.SRT_OCEAN}
        atl_gdb = icesat2.atl03sp(parms)
    
       
        if len(atl_gdb)>0:            
            # atl_gdb.to_pickle('icesat2_'+str(e.dataset_id)+'.pkl')
            print('success '+ FF[jj][:-4] + ' ' + t_start)
            print('no. of photons: '+str(len(atl_gdb)))


working on 6/10
working on 7/10
working on 8/10
working on 9/10
working on 10/10
