In [1]:
# sort in order of appearance

import numpy as np
import sys
import logging
import concurrent.futures
import time
from datetime import datetime
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from pyproj import Transformer, CRS
import shapely.geometry as sg
from shapely.geometry import Polygon, Point
from sliderule import sliderule, icesat2, earthdata, h5, ipysliderule, io
import warnings
from IPython import display
import json
import utils.toolshelf as t
from random import sample
import os

import fiona

warnings.filterwarnings('ignore')
%matplotlib inline



In [24]:
####################### SET PARAMETERS #################

## Select predetermined track (2 digit cycle number)
site = "ross1"
startCycle = 1
endCycle = 21

## Data access options

# (-1: Skip altogether, 0: process fresh, 1: load from geojson, 2: load from csv)
accessType03 = 0
accessType06 = 1

ignoreTracks=None

## input options

# resolution (recommended 20.0)
# "len" and "res" (will be equal)
res = 10.0
cnf = 2

#minimum track length

## output options

# Save flag 06 (0: dont save, 1: save as geojson (slow but everything works), 
# 2: save as csv (faster but nothing works))
sFlag06 = 0

# Save flag melange (0: dont save, 1: save as geojson)
sFlagm = 0

# Save flag 03 (0: dont save, 1: save as geojson)
sFlag03 = 1

##########

#Location specific settings
melange_cutoff = 30

In [25]:
# Get ATL06 Data 

# Configure ICESat-2 API
icesat2.init("slideruleearth.io", verbose=True)

In [38]:
%%time

for currentCycle in range(startCycle, endCycle+1):
    print(str(currentCycle).zfill(2))
    cycle = str(currentCycle).zfill(2)

    
    ## Some defaults for file namingd
    # _save and _load is to prevent accidentally overwriting good data
    # as such, freshly created data needs to be managed manually
    #if not os.path.exists(fig_dir): os.makedirs(fig_dir)
    datRoot = f"../data/IS2/"
    datRoot=f"/Volumes/nox/Chance/rifts/data/IS2/"
    if not os.path.exists(datRoot): os.makedirs(datRoot)
    file03_save = f"{datRoot}{site}_ATL03_cyc{cycle}_res{int(res)}"
    file06_save = f"{datRoot}{site}_ATL06_cyc{cycle}_res{int(res)}"
    fileMel_save = f"{datRoot}{site}_mel_cyc{cycle}_res{int(res)}"
    file03_load = f"{datRoot}{site}_ATL03_res{int(res)}/{site}_ATL03_cyc{cycle}_res{int(res)}"
    file06_load = f"{datRoot}{site}_ATL06_res{int(res)}/{site}_ATL06_cyc{cycle}_res{int(res)}"
    fileMel_load = f"{datRoot}{site}_ATL06_mel_res{int(res)}/{site}_mel_cyc{cycle}_res{int(res)}"
    
    #Params for this location
    parms = {
        "poly": t.getRegion(site, cycle),
        "srt": 0,
        "len": res*2,
        "res": res,
        "cnf": cnf,
        "maxi": 6,
        "ats": 5.0,
        "cnt": 5,
        "H_min_win": 3.0,
        "sigma_r_max": 5.0,
        "cycle": cycle,
        "atl03_geo_fields": ['geoid', 'dem_h', 'dem_flag', 'tide_earth', 'tide_ocean', 'geoid_free2mean', 'tide_earth_free2mean']
    }

    #Triple check this cell
    
    print('getting data')
    atl06_sr = t.get06Data(parms, file06_load=file06_load, accessType=accessType06, file06_save=file06_save, sFlag06=sFlag06, verbose=True)
    
    print('assembling track list')
    # separate all good tracks and put them into a list
    trackList, lens = t.getTrackList(atl06_sr, return_lens=True, omit=ignoreTracks, verbose=True)
    ss_atl06_sr = pd.concat([t.getTrack(atl06_sr, trackInfo)[0] for trackInfo in trackList])
    
    print('tide and geoid corrections')
    #Geoid and Tides
    ss_atl06_sr.h_mean -= ss_atl06_sr.geoid+ss_atl06_sr.tide_earth+ss_atl06_sr.tide_ocean
    
    print('grabbing melange')
    # Establish mélange cutoff from mean
    melange_sr = ss_atl06_sr[ss_atl06_sr.h_mean<ss_atl06_sr.h_mean.mean()]
    
    melange_trackList, melange_lens = t.getTrackList(melange_sr, return_lens=True, verbose=False)
    
    #new tracklist
    ss_atl06_sr_list = [t.getTrack(ss_atl06_sr, trackInfo)[0] for trackInfo in trackList]
    t.display06Info(ss_atl06_sr)
    
    
    
    if sFlagm==1: 
        print('saving melange')
        t.toGeojson(melange_sr, fileMel_save)

01
getting data
Processing new ATL06-SR dataset


KeyboardInterrupt: 

In [35]:
currentCycle = startCycle
cycle = str(currentCycle).zfill(2)

In [36]:
%%time
#EarthData query to acquire granule IDs

earthdata.set_max_resources=2000
# must quiery by space and time
granules_list = earthdata.cmr(short_name='ATL03', polygon=t.getRegion(site, cycle), version='006', time_start=t.getDateTime(atl06_sr.index.min()), 
    time_end=t.getDateTime(atl06_sr.index.max()), return_metadata=True)
granMD = pd.DataFrame([t.unpackGranuleID(gran) for gran in granules_list[0]]).set_index('granuleID')
granule_sizes = [float(granule['granule_size']) for granule in granules_list[1]]
print(f"{len(granule_sizes)} granules with average size {np.mean(granule_sizes)/1e6} MB, totalling {np.sum(granule_sizes)/1e6/1000} GB")



43 granules with average size 551.9367357441873 MB, totalling 23.73327963700006 GB
CPU times: user 223 ms, sys: 6.48 ms, total: 230 ms
Wall time: 767 ms


In [37]:
granMD

Unnamed: 0_level_0,shortName,date,time,rgt,cycle,granuleNumber,version,release
granuleID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ATL03_20230621174417_00232012_006_01.h5,ATL03,2023-06-21,17:44:17,23,20,12,6,1
ATL03_20230622043054_00302010_006_01.h5,ATL03,2023-06-22,04:30:54,30,20,10,6,1
ATL03_20230623040514_00452010_006_01.h5,ATL03,2023-06-23,04:05:14,45,20,10,6,1
ATL03_20230625173556_00842012_006_01.h5,ATL03,2023-06-25,17:35:56,84,20,12,6,1
ATL03_20230626042233_00912010_006_01.h5,ATL03,2023-06-26,04:22:33,91,20,10,6,1
ATL03_20230627035653_01062010_006_01.h5,ATL03,2023-06-27,03:56:53,106,20,10,6,1
ATL03_20230629172731_01452012_006_01.h5,ATL03,2023-06-29,17:27:31,145,20,12,6,1
ATL03_20230701034830_01672010_006_01.h5,ATL03,2023-07-01,03:48:30,167,20,10,6,1
ATL03_20230703171913_02062012_006_01.h5,ATL03,2023-07-03,17:19:13,206,20,12,6,1
ATL03_20230704165334_02212012_006_01.h5,ATL03,2023-07-04,16:53:34,221,20,12,6,1


In [None]:
# ATL03 Processing

# Some track info

#atl03_yapce track from earlier (beam not selected yet)


trackID = granMD[(granMD.cycle==cycle)*(granMD.rgt==rgt)].index[0]
gtNum = int(t.gtDict[gt][2])
pair = 0 if t.gtDict[gt][3]=="l" else 1
trackID

trackInfo = (cycle, rgt, t.gtDict[gt])
track, cycle, rgt, gt = t.getTrack(ss_atl06_sr, trackInfo)
mTrack, cycle, rgt, gt = t.getTrack(melange_sr, trackInfo)

## add something to check if the data already exists

##### Set ATL03 sp parameters ##############################
parms = {
    "poly": t.getRegion(site, cycle),
    "srt": 0,
    "len": res*2,
    "res": res,
    # identify ground track
    "track": gtNum,
    # classification and checks
    # still return photon segments that fail checks
    "pass_invalid": True,
    # all photons
    #"cnf": 2,
    "cnt": 5,
    "atl03_geo_fields": ["ref_azimuth", "ref_elev", "geoid", 
        'dem_h', 'dem_flag', 'tide_earth', 'tide_ocean', 'geoid_free2mean', 'tide_earth_free2mean'],
    "atl03_ph_fields": ["delta_time", "weight_ph"],
    # all land classification flags
    "atl08_class": ["atl08_noise", "atl08_ground", "atl08_canopy", "atl08_top_of_canopy", "atl08_unclassified"],
    # all photons
    "yapc": dict(knn=0, win_h=6, win_x=11, min_ph=4, score=0), 
}

#Have a list of only one granule
gdf = icesat2.atl03sp(parms, resources=[trackID])
atl03_yapc = gdf[gdf.pair==pair]

#Geophysical corrections
atl03_yapc.height -= atl03_yapc.geoid+atl03_yapc.tide_ocean+atl03_yapc.tide_earth

# Display Statistics
t.display03Info(atl03_yapc)

if sFlag03==1:
    print(f'Saving file as geojson named {file03_save}.geojson')
    atl03_yapc.to_file(f"{file03_save}.geojson", driver='GeoJSON')
    
    
## Other stuff    
ss_atl03_yapc = t.filter_yapc(atl03_yapc, 0)
pTrack = ss_atl03_yapc
ss_atl03_yapc = atl03_yapc

In [None]:
%%time
# ATL03 Processing for a track

## add something to check if the data already exists

##### Set ATL03 sp parameters ##############################
parms = {
    "poly": region,
    "srt": 0,
    "len": res*2,
    "res": res,
    # identify ground track
    "track": tr,
    # classification and checks
    # still return photon segments that fail checks
    "pass_invalid": True,
    # all photons
    "cnf": 2,
    "cnt": 5,
    "atl03_geo_fields": ["ref_azimuth", "ref_elev", "geoid"],
    "atl03_ph_fields": ["delta_time"],
    # all land classification flags
    "atl08_class": ["atl08_noise", "atl08_ground", "atl08_canopy", "atl08_top_of_canopy", "atl08_unclassified"],
    # all photons
    #"yapc": dict(knn=0, win_h=6, win_x=11, min_ph=4, score=100), 
}

#Have a list of only one granule
gdf = icesat2.atl03sp(parms, resources=[trackID])
atl03_yapc = gdf[gdf.pair==pair]

# Display Statistics
t.display03Info(atl03_yapc)

if sFlag03==1:
    print(f'Saving file as geojson named {file03_save}.geojson')
    atl03_yapc.to_file(f"{file03_save}.geojson", driver='GeoJSON')