## imports

In [1]:
import argparse
import icelakes
from icelakes.utilities import encedc, decedc
from icelakes.nsidc import download_granule, edc
from icelakes.detection import read_atl03 #, detect_lakes
from icelakes.detection import make_mframe_df, find_flat_lake_surfaces, get_densities_and_2nd_peaks,\
                               merge_lakes, check_lake_surroundings, calculate_remaining_densities, prnt


## parsing arguments from command line

In [2]:
parser = argparse.ArgumentParser(description='Test script to print some stats for a given ICESat-2 ATL03 granule.')
parser.add_argument('--granule', type=str, default='ATL03_20210715182907_03381203_005_01.h5',
                    help='The producer_id of the input ATL03 granule')
parser.add_argument('--polygon', type=str, default='geojsons/jakobshavn_small.geojson',
                    help='The file path of a geojson file for spatial subsetting')
parser.add_argument('--IS2datadir', type=str, default='IS2data',
                    help='The directory into which to download ICESat-2 granules')
parser.add_argument('--download_gtxs', type=str, default='all',
                    help='String value or list of gtx names to download, also accepts "all"')

# set arguments as class for now, to run in jupyter
if parser.prog == 'ipykernel_launcher.py':
    class Args:
        granule = 'ATL03_20210715182907_03381203_005_01.h5'
        polygon = 'geojsons/jakobshavn_small.geojson'
        IS2datadir =  'IS2data'
        download_gtxs = 'all'
    args=Args()
else:
    args = parser.parse_args()

In [3]:
# from icelakes.nsidc import shp2geojson
# shp2geojson('shapefiles/weird_shape_test.shp')

## download data from NSIDC

In [4]:
# # args.polygon = 'geojsons/jakobshavn_small.geojson'
# input_filename = download_granule(args.granule, args.download_gtxs, args.polygon, args.IS2datadir, decedc(edc().u), decedc(edc().p))

In [5]:
# just to not have to re-download nsidc data while testing
input_filename = 'IS2data/processed_ATL03_20210715182907_03381203_005_01.h5'

In [6]:
import os
import h5py
import datetime
import traceback
import pandas as pd
import numpy as np
from sklearn.neighbors import KDTree
from scipy.stats import binned_statistic
from scipy.signal import find_peaks
import matplotlib.pylab as plt
from cmcrameri import cm as cmc
from mpl_toolkits.axes_grid1 import make_axes_locatable
from icelakes.utilities import convert_time_to_string
pd.set_option('mode.chained_assignment', 'raise')


##########################################################################################
def detect_lakes(photon_data, gtx, ancillary, polygon, verbose=False):

    # get the data frame for the gtx and aggregate info at major frame level
    df = photon_data[gtx]
    df_mframe = make_mframe_df(df)
    
    # get all the flat segments and select
    print('\n-----------------------------------------------------------------------------\n')
    print('PROCESSING GROUND TRACK: %s (%s)' % (gtx, ancillary['gtx_strength_dict'][gtx]))
    print('---> finding flat surfaces', end=' ')
    df_mframe = find_flat_lake_surfaces(df_mframe, df)
    print('(%i / %i were flat)' % (df_mframe.is_flat.sum(), df_mframe.is_flat.count()))
    df_selected = df_mframe[df_mframe.is_flat]
    
    # calculate densities and find second peaks (where surface is flat)
    print('---> calculating densities and looking for second peaks')
    get_densities_and_2nd_peaks(df, df_mframe, df_selected, gtx, ancillary, print_results=verbose)
    print('(%i / %i pass lake quality test)' % (df_mframe.lake_qual_pass.sum(), df_mframe.lake_qual_pass.count()))
    
    print('---> merging lake segments iteratively')
    df_lakes = merge_lakes(df_mframe, print_progress=verbose, debug=verbose)
    if df_lakes is None:
        return df_lakes, df_mframe, df
    prnt(df_lakes)
    
    print('---> checking lake edges and extending lakes if they match')
    df_lakes = check_lake_surroundings(df_mframe, df_lakes)
    prnt(df_lakes)
    
    print('---> calculating remaining photon densities')
    calculate_remaining_densities(df, df_mframe, df_lakes, gtx, ancillary)
    
    thelakes = []
    if df_lakes is not None:
        for i in range(len(df_lakes)):
            lakedata = df_lakes.iloc[i]
            thislake = melt_lake(lakedata.mframe_start, lakedata.mframe_end, lakedata.surf_elev)
            thislake.add_data(df, df_mframe, gtx, ancillary, polygon)
            thelakes.append(thislake)
    
    return thelakes


##########################################################################################
class melt_lake:
    def __init__(self, mframe_start, mframe_end, main_peak):
        self.mframe_start = int(mframe_start)
        self.mframe_end = int(mframe_end)
        self.main_peak = main_peak

    
    #-------------------------------------------------------------------------------------
    def add_data(self, df, df_mframe, gtx, ancillary, polygon):
        
        # useful metadata
        self.granule_id = ancillary['granule_id']
        self.rgt = ancillary['rgt']
        self.gtx = gtx
        self.polygon_filename = polygon
        self.polygon_name = polygon[polygon.rfind('/')+1 : polygon.find('.geojson')]
        self.beam_number = ancillary['gtx_beam_dict'][self.gtx]
        self.beam_strength = ancillary['gtx_strength_dict'][self.gtx]
        self.cycle_number = ancillary['cycle_number']
        self.sc_orient = ancillary['sc_orient']
        
        # add the data frames at the photon level and at the major frame level
        self.photon_data = df[(df['mframe'] >= self.mframe_start) & (df['mframe'] <= self.mframe_end)].copy()
        self.mframe_data = df_mframe[(df_mframe.index >= self.mframe_start) & (df_mframe.index <= self.mframe_end)].copy()
        self.date_time = convert_time_to_string(self.mframe_data['dt'].mean())
        self.photon_data.reset_index(inplace=True)
        
        # compile the second returns in simple arrays
        h_2nds = np.array([v for l in list(self.mframe_data['h_2nd_returns'])[2:-2] for v in l])
        xatc_2nds = np.array([v for l in list(self.mframe_data['xatc_2nd_returns'])[2:-2] for v in l])
        prom_2nds = np.array([v for l in list(self.mframe_data['proms_2nd_returns'])[2:-2] for v in l])
        self.detection_2nd_returns = {'h':h_2nds, 'xatc':xatc_2nds, 'prom':prom_2nds}
        
        # add general lat/lon info for the whole lake
        # self.detection_quality = np.sum(self.mframe_data['quality_summary']) / (len(self.mframe_data) - 4)
        self.lat = self.mframe_data['lat'].mean()
        self.lat_min = self.mframe_data['lat'].min()
        self.lat_max = self.mframe_data['lat'].max()
        self.lat_str = '%.5f°N'%(self.lat) if self.lat>=0 else '%.5f°S'%(-self.lat)
        self.lon = self.mframe_data['lon'].mean()
        self.lon_min = self.mframe_data['lon'].min()
        self.lon_max = self.mframe_data['lon'].max()
        self.lon_str = '%.5f°E'%(self.lon) if self.lon>=0 else '%.5f°W'%(-self.lon)
        
        # get the ice sheet and the melt season
        self.ice_sheet = 'GrIS' if self.lat>=0 else 'AIS'
        meltseason = 'XX'
        if self.ice_sheet=='GrIS':
            meltseason = self.date_time[:4]
        elif self.ice_sheet=='AIS':
            thismonth = int(self.date_time[5:7])
            thisyear = int(self.date_time[:4])
            if thismonth > 6:
                meltseason = str(thisyear) + '-' + str((thisyear+1)%100)
            elif thismonth <= 6:
                meltseason = str(thisyear-1) + '-' + str(thisyear%100)
        self.melt_season = meltseason
        
        # quick-look link to OpenAltimetry
        mptyp = 'arctic' if self.lat>=0 else 'antarctic'
        lake_oa_url = 'https://openaltimetry.org/data/icesat2/elevation?product=ATL03&zoom_level=7&tab=photon&'
        lake_oa_url += 'date={date}&minx={minx}&miny={miny}&maxx={maxx}&maxy={maxy}&tracks={track}&mapType={mptyp}&beams={beam_nr}'.format(
                date=self.date_time[:10], minx=self.lon_min, miny=self.lat_min, maxx=self.lon_max, maxy=self.lat_max,
                track=self.rgt, mptyp=mptyp, beam_nr=self.beam_number)
        self.oaurl = lake_oa_url

    #-------------------------------------------------------------------------------------
    def calculate_extent(self):
        
        return
    
    
    #-------------------------------------------------------------------------------------
    def calculate_detection_summary(self):
        
        return
    
        
    #-------------------------------------------------------------------------------------
    def plot_detected(self, fig_dir='figs', verbose=False, min_width=100, min_depth=1.5):
        
        lake_minh = np.min((self.mframe_data['peak'].min(), np.min(self.detection_2nd_returns['h'])))
        h_range = self.main_peak - lake_minh
        lake_max_depth = thiselev - np.min(h_2nds)
        lake_segment_length = np.max(xatc_2nds) - np.min(xatc_2nds)
        lake_maxh = np.min((df_mframe_lake['peak'].max(), thiselev+5*h_range))
        buffer_top = np.max((0.2*h_range, 1.0))
        buffer_bottom = np.max((0.3*h_range, 2.0))
        lake_minh_plot = lake_minh - buffer_bottom
        lake_maxh_plot = lake_maxh + buffer_top
        return
        
        

##########################################################################################
def plot_found_lakes(df, df_mframe, df_extracted_lakes, ancillary, gtx, polygon, fig_dir='figs/', verbose=False,
                     min_width=100, min_depth=1.5):

    plt.close('all')

    for i in range(len(df_extracted_lakes)):
        
        thislake = df_extracted_lakes.iloc[i]
        thiselev = thislake['surf_elev']
        extent_start = thislake['mframe_start']
        extent_end = thislake['mframe_end']

        # subset the dataframes to the current lake extent
        df_lake = df[(df['mframe'] >= extent_start) & (df['mframe'] <= extent_end)]
        df_mframe_lake = df_mframe[(df_mframe.index >= extent_start) & (df_mframe.index <= extent_end)]
        h_2nds = [v for l in list(df_mframe_lake['h_2nd_returns'])[2:-2] for v in l]
        xatc_2nds = [v for l in list(df_mframe_lake['xatc_2nd_returns'])[2:-2] for v in l]
        prom_2nds = [v for l in list(df_mframe_lake['proms_2nd_returns'])[2:-2] for v in l]

        # get statistics
        # average mframe quality summary excluding the two mframes for buffer on each side
        lake_poly = polygon[polygon.rfind('/')+1 : polygon.find('.geojson')]
        lake_quality = np.sum(df_mframe_lake['quality_summary']) / (len(df_mframe_lake) - 4)
        lake_time = convert_time_to_string(df_mframe_lake['dt'].mean())
        lake_lat = df_mframe_lake['lat'].mean()
        lake_lat_min = df_mframe_lake['lat'].min()
        lake_lat_max = df_mframe_lake['lat'].max()
        lake_lat_str = '%.5f°N'%(lake_lat) if lake_lat>=0 else '%.5f°S'%(-lake_lat)
        lake_lon = df_mframe_lake['lon'].mean()
        lake_lon_min = df_mframe_lake['lon'].min()
        lake_lon_max = df_mframe_lake['lon'].max()
        lake_lon_str = '%.5f°E'%(lake_lon) if lake_lon>=0 else '%.5f°W'%(-lake_lon)
        lake_gtx = gtx
        lake_track = ancillary['rgt']
        lake_cycle = ancillary['cycle_number']
        lake_beam_strength = ancillary['gtx_strength_dict'][lake_gtx]
        lake_beam_nr = ancillary['gtx_beam_dict'][lake_gtx]
        lake_minh = np.min((df_mframe_lake['peak'].min(), np.min(h_2nds)))
        h_range = thiselev - lake_minh
        lake_max_depth = thiselev - np.min(h_2nds)
        lake_segment_length = np.max(xatc_2nds) - np.min(xatc_2nds)
        lake_maxh = np.min((df_mframe_lake['peak'].max(), thiselev+5*h_range))
        buffer_top = np.max((0.2*h_range, 1.0))
        buffer_bottom = np.max((0.3*h_range, 2.0))
        lake_minh_plot = lake_minh - buffer_bottom
        lake_maxh_plot = lake_maxh + buffer_top
        mptyp = 'arctic' if lake_lat>=0 else 'antarctic'
        lake_oa_url = 'https://openaltimetry.org/data/icesat2/elevation?product=ATL03&zoom_level=7&tab=photon&'
        lake_oa_url += 'date={date}&minx={minx}&miny={miny}&maxx={maxx}&maxy={maxy}&tracks={track}&mapType={mptyp}&beams={beam_nr}'.format(
                date=lake_time[:10], minx=lake_lon_min, miny=lake_lat_min, maxx=lake_lon_max, maxy=lake_lat_max,
                track=lake_track, mptyp=mptyp, beam_nr=lake_beam_nr)
        
        # plot only if criteria are fulfilled
        if (lake_max_depth > min_depth) & (lake_max_depth < 50.0) & (lake_segment_length > min_width):
            fig, ax = plt.subplots(figsize=[9, 5], dpi=100)

            ax.scatter(df_lake.xatc-df_lake.xatc.min(), df_lake.h, s=6, c='k', alpha=0.05, edgecolors='none')
            scatt = ax.scatter(df_lake.xatc-df_lake.xatc.min(), df_lake.h, s=3, c=df_lake.snr, alpha=1, edgecolors='none',
                               cmap=cmc.lajolla,vmin=0,vmax=1)

            # plot surface elevation
            xmin, xmax = ax.get_xlim()
            ax.plot([xmin, xmax], [thiselev, thiselev], 'g-', lw=0.5)

            # plot mframe bounds
            ymin, ymax = ax.get_ylim()
            mframe_bounds_xatc = list(df_mframe_lake['xatc_min']) + [df_mframe_lake['xatc_max'].iloc[-1]]
            for xmframe in mframe_bounds_xatc:
                ax.plot([xmframe-df_lake.xatc.min(), xmframe-df_lake.xatc.min()], [ymin, ymax], 'k-', lw=0.5)

            dfpass = df_mframe_lake[df_mframe_lake['lake_qual_pass']]
            dfnopass = df_mframe_lake[~df_mframe_lake['lake_qual_pass']]
            ax.plot(dfpass.xatc-df_lake.xatc.min(), dfpass.peak, marker='o', mfc='g', mec='g', linestyle = 'None', ms=5)
            ax.plot(dfnopass.xatc-df_lake.xatc.min(), dfnopass.peak, marker='o', mfc='none', mec='r', linestyle = 'None', ms=3)

            for j, prom in enumerate(prom_2nds):
                ax.plot(xatc_2nds[j]-df_lake.xatc.min(), h_2nds[j], marker='o', mfc='none', mec='b', linestyle = 'None', ms=prom*4)

            divider = make_axes_locatable(ax)
            cax = divider.append_axes('right', size='4%', pad=0.05)
            fig.colorbar(scatt, cax=cax, orientation='vertical')

            ax.set_ylim((lake_minh_plot, lake_maxh_plot))
            ax.set_xlim((0.0, df_mframe_lake['xatc_max'].iloc[-1] - df_lake.xatc.min()))

            ax.set_title('Lake at (%s, %s) on %s\nICESat-2 track %d %s (%s), cycle %d [lake quality: %.2f]' % \
                         (lake_lat_str, lake_lon_str, lake_time, lake_track, lake_gtx,lake_beam_strength, lake_cycle, lake_quality))
            ax.set_ylabel('elevation above geoid [m]')
            ax.set_xlabel('along-track distance [m]')

            # save figure
            if not os.path.exists(fig_dir): os.makedirs(fig_dir)
            epoch = df_mframe_lake['dt'].mean() + datetime.datetime.timestamp(datetime.datetime(2018,1,1))
            dateid = datetime.datetime.fromtimestamp(epoch).strftime("%Y%m%d-%H%M%S")
            granid = ancillary['granule_id'][:-3]
            latid = '%dN'%(int(np.round(lake_lat*1e5))) if lake_lat>=0 else '%dS'%(-int(np.round(lake_lat*1e5)))
            lonid = '%dE'%(int(np.round(lake_lon*1e5))) if lake_lon>=0 else '%dW'%(-int(np.round(lake_lon*1e5)))
            figname = fig_dir + 'lake_%s_%s_%s_%s-%s.jpg' % (lake_poly, granid, lake_gtx, latid, lonid)
            fig.savefig(figname, dpi=300, bbox_inches='tight', pad_inches=0)
            plt.close(fig)
        
        if verbose:
            print('\nLAKE %i:' % i)
            print('    - %s' % lake_time)
            print('    - %s, %s' % (lake_lat_str, lake_lon_str))
            print('    - segment length: %.1f km' % (lake_segment_length/1000))
            print('    - max depth: %.1f m' % lake_max_depth)
            print('    - quality: %.2f' % lake_quality)
            print('    - quick look: %s' % lake_oa_url)
            

## read in the data

In [7]:
%%time
photon_data, bckgrd_data, ancillary = read_atl03(input_filename, geoid_h=True)
gtx_list = list(photon_data.keys())
print('  Ground Tracks to process: %s' % ', '.join(gtx_list))

  reading in IS2data/processed_ATL03_20210715182907_03381203_005_01.h5
  reading in beam: gt1l gt1r gt2l gt2r gt3l gt3r  --> done.
  Ground Tracks to process: gt1l, gt1r, gt2l, gt2r, gt3l, gt3r
CPU times: user 5.5 s, sys: 951 ms, total: 6.45 s
Wall time: 6.3 s


In [9]:
lake_list

[<__main__.melt_lake at 0x24fe25e50>,
 <__main__.melt_lake at 0x24fe25fd0>,
 <__main__.melt_lake at 0x24fe36e80>,
 <__main__.melt_lake at 0x24fe8b8b0>,
 <__main__.melt_lake at 0x24fe8b910>,
 <__main__.melt_lake at 0x24ffaccd0>,
 <__main__.melt_lake at 0x24fe26640>,
 <__main__.melt_lake at 0x103fb1610>,
 <__main__.melt_lake at 0x24ffef430>,
 <__main__.melt_lake at 0x24fe263d0>,
 <__main__.melt_lake at 0x24fe84d90>]

In [None]:
for lake in lake_list: 
    

In [8]:
%%time
lake_list = []
for gtx in gtx_list:
    lake_list += detect_lakes(photon_data, gtx, ancillary, args.polygon, verbose=True)


-----------------------------------------------------------------------------

PROCESSING GROUND TRACK: gt1l (weak)
---> finding flat surfaces (16 / 666 were flat)
---> calculating densities and looking for second peaks
   mframe 687: h=1073.15m. flat=Yes. snrs= 152,   5,   3. 2nds=  0%. qual=0.00. pass= No.
   mframe 759: h=1009.55m. flat=Yes. snrs=  76,  14,   3. 2nds=  0%. qual=0.00. pass= No.
   mframe 765: h=1008.85m. flat=Yes. snrs= 317,   8,   9. 2nds= 20%. qual=0.00. pass= No.
   mframe 811: h= 969.31m. flat=Yes. snrs= 322,  10,   7. 2nds= 50%. qual=0.04. pass=Yes.
   mframe 812: h= 969.34m. flat=Yes. snrs= 327,   9,   8. 2nds=  0%. qual=0.00. pass= No.
   mframe 820: h= 972.79m. flat=Yes. snrs= 146,   6,   5. 2nds= 60%. qual=0.01. pass=Yes.
   mframe 821: h= 972.84m. flat=Yes. snrs= 194,   7,   5. 2nds= 30%. qual=0.00. pass= No.
   mframe 997: h= 946.30m. flat=Yes. snrs=  57,  20,   3. 2nds=  0%. qual=0.00. pass= No.
   mframe 018: h= 945.05m. flat=Yes. snrs= 101,   5,   5. 2

In [None]:
# thelakes = []
# gtx = 'gt2l'
# if df_lakes is not None:
#     for i in range(len(df_lakes)):
#         lakedata = df_lakes.iloc[i]
#         thislake = melt_lake(lakedata.mframe_start, lakedata.mframe_end, lakedata.surf_elev)
#         thislake.add_data(df, df_mframe, gtx, ancillary, args.polygon)
#         thelakes.append(thislake)

In [None]:
# %%time
# for gtx in gtx_list:
#     df_lakes, df_mframe, df = detect_lakes(photon_data, gtx, ancillary, args.polygon, verbose=True)

In [None]:
import os
import zipfile
filelist = ['figs/'+f for f in os.listdir('figs/') if os.path.isfile(os.path.join('figs/', f))]
ZipFile = zipfile.ZipFile("zip_testing.zip", "w" )
for file in filelist:
    ZipFile.write(file, compress_type=zipfile.ZIP_DEFLATED)
ZipFile.close()

In [None]:
ancillary