Goal: pull out 1 buoy record that has ssc&chla and search for matching icesat2 granules

To do:
- Load info for 1 relevant station
- Find all matching atl granules and save times from file names
- Compare times to see if any matching buoy times

In [1]:
%pip install --quiet erddapy

Note: you may need to restart the kernel to use updated packages.


In [2]:
from erddapy import ERDDAP
import pandas as pd
import numpy as np
from sliderule import sliderule, icesat2, earthdata
from datetime import datetime, timezone, timedelta

pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

sliderule.init()

Client (version (4, 5, 3)) is out of date with the server (version (4, 6, 2))


False

In [3]:
def fname2datetime(fname):
    y = int(fname[6:10])
    m = int(fname[10:12])
    d = int(fname[12:14])
    H = int(fname[14:16])
    M = int(fname[16:18])
    S = int(fname[18:20])

    t = datetime(y,m,d,H,M,S, tzinfo=timezone.utc)
    return t

def buoy_bound_box(lat,lon,buffer_km):
    # define a buffer distance around the buoy to search for icesat-2 data
    lat_buff = buffer_km/111 # convert buffer distance to frac of 1 deg lat
    lon_buff = buffer_km/(111*np.cos(lat*np.pi/180)) # convert buffer distance to frac of 1 deg lon
    # define bounding box around the buoy (WSEN)
    # example: bbox = [-108.3, 39.2, -107.8, 38.8]
    # bbox = [lon-lon_buff,lat+lat_buff,lon+lon_buff,lat-lat_buff]
    # region = sliderule.toregion(bbox)
    minx = lon - lon_buff
    miny = lat - lat_buff
    maxx = lon + lon_buff
    maxy = lat + lat_buff

    poly = [{'lon': minx, 'lat': miny},
            {'lon': maxx, 'lat': miny},
            {'lon': maxx, 'lat': maxy},
            {'lon': minx, 'lat': maxy},
            {'lon': minx, 'lat': miny}] # Closing the loop by repeating the first point
    return poly


In [4]:
# search for all possible station
e = ERDDAP(
    server='https://erddap-goldcopy.dataexplorer.oceanobservatories.org/erddap/', 
    protocol="tabledap", # Want table data (not a grid map of data) 
    response="csv") #in csv format for pandas


In [5]:
# dont run this cell if you can just load "all_endurance_array_sites"
search_url = e.get_search_url(search_for='radiation', response="csv")# search for a match to our time
temp = pd.read_csv(search_url)["Dataset ID"].unique() # try to save the data
# # make a df for all the possible sites, with time, lat and lon
FF = pd.DataFrame(temp,columns=['sites'])
FF["time_coverage_start"] = ''
FF["time_coverage_end"] = ''
FF["lat"] = np.nan
FF["lon"] = np.nan
FF["check_sum"] = 1

for jj in range(len(FF)):
    if jj % 1000 ==0:
        print(jj)# give a printout every 1000 for my sanity
    
    # make the info URL for this site for this time range
    e.dataset_id = FF['sites'][jj]
    info_url = e.get_info_url()

    # make a dataframe for all the metadata for this station
    # some of these urls are bogus - if fail then fill with NaNs
    try:
        df = pd.read_csv(info_url)# make a dataframe for all the metadata for this station
    except:
        FF.loc[jj,"check_sum"] = np.nan
        continue

    # some metadata is missing - leave blank if any is empty
    try:
        FF.loc[jj,"time_coverage_start"] = df.loc[df['Attribute Name']=='time_coverage_start', 'Value'].item()
    except:
        pass

    try:
        FF.loc[jj,"time_coverage_end"] = df.loc[df['Attribute Name']=='time_coverage_end', 'Value'].item()
    except:
        pass

    try:
        FF.loc[jj,"lat"] = df.loc[df['Attribute Name']=='lat', 'Value'].item()
    except:
        pass

    try:
        FF.loc[jj,"lon"] = df.loc[df['Attribute Name']=='lon', 'Value'].item()
    except:
        pass



FF = FF.dropna(subset="check_sum")  
FF.reset_index(drop=True, inplace=True)

# only look at sites with GPS location
FF["lat"] = pd.to_numeric(FF["lat"])
FF["lon"] = pd.to_numeric(FF["lon"])
FF = FF[~np.isnan(FF['lat'])]
FF.reset_index(drop=True, inplace=True)

FF.to_pickle('all_endurance_array_sites_radiation')


0


  FF.loc[jj,"lat"] = df.loc[df['Attribute Name']=='lat', 'Value'].item()
  FF.loc[jj,"lon"] = df.loc[df['Attribute Name']=='lon', 'Value'].item()


In [6]:
# FF = pd.read_pickle('all_endurance_array_sites_radiation')

len(FF)

457

In [None]:
# loop through each buoy asset:
for jj in range(10):
    # define a search region around the buoy 
    lat = FF['lat'][jj]
    lon = FF['lon'][jj]
    poly = buoy_bound_box(lat,lon,0.1)

    # search CMR for ATL03 granules in the bounding box
    grns = earthdata.cmr(short_name="ATL03",
                         polygon=poly,
                         version='006')
    # save the times for each granule as a datetime object
    icesat_times = [fname2datetime(fname) for fname in grns]

    # now check if buoy data exists for these granules
    e.dataset_id = FF['sites'][jj]

    for t in icesat_times:
        # add a time buffer (+/-1 hours) to search for relevant buoy data for each granule,
        t_start = (t-timedelta(hours=1)).strftime("%Y-%m-%dT%H:%M:%S+00:00")
        t_end = (t+timedelta(hours=1)).strftime("%Y-%m-%dT%H:%M:%S+00:00")
        e.constraints = {"time>=": t_start,
                        "time<=": t_end}

        # try to download the associated buoy data
        try:
            buoy = e.to_pandas(parse_dates=True)
        except:
            continue
        # if buoy data exists, download the ATL03 photons in the bounding box at this time
        print('downloading ATL03 for '+ e.dataset_id + ' ' + str(t_start))
        # Build ATL03 Request
        poly = buoy_bound_box(lat,lon,1)
        parms = {"poly": poly,
                 "t0": t_start,
                 "t1": t_end,
                 "srt": icesat2.SRT_OCEAN,
                 "track": 1,
                 "beam": 'gt1l',
                }      
        atl_gdb = icesat2.atl03sp(parms)
        print('no. of photons: '+len(atl_gdb))
