In [2]:
# sort in order of appearance

import numpy as np
import sys
import logging
import concurrent.futures
import time
from datetime import datetime
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from pyproj import Transformer, CRS
import shapely.geometry as sg
from shapely.geometry import Polygon, Point
from sliderule import sliderule, icesat2, earthdata, h5, ipysliderule, io
import warnings
from IPython import display
import json
import utils.toolshelf as t
from random import sample
import os

import fiona

warnings.filterwarnings('ignore')
%matplotlib inline

In [3]:
####################### SET PARAMETERS #################
singleCycle = True
cycle = 20


## Select location and cycle (2 digit cycle number)
site = "ross1"
startCycle = 1
endCycle = 21

#Location specific settings
melange_cutoff = 30

## Data access options

# (-1: Skip altogether, 0: process fresh, 1: load from geojson, 2: load from csv)
accessType03 = 0
accessType06 = 0

ignoreTracks=None

## input options

# resolution (recommended 20.0)
# "len" and "res" (will be equal)
res = 10.0
cnf = 2

#minimum track length

## output options

# Save flag 06 (0: dont save, 1: save as geojson (slow but everything works), 
# 2: save as csv (faster but nothing works))
sFlag06 = 0

# Save flag melange (0: dont save, 1: save as geojson)
sFlagm = 0

# Save flag 03 (0: dont save, 1: save as geojson)
sFlag03 = 1

##########

In [4]:
# Get ATL06 Data 

# Configure ICESat-2 API
icesat2.init("slideruleearth.io", verbose=True)

In [5]:
%%time
if singleCycle: cycleList = [cycle]
elif not singleCycle: cycleList = range(startCycle, endCycle+1)
for currentCycle in cycleList:
    print(str(currentCycle).zfill(2))
    cycle = str(currentCycle).zfill(2)

    
    ## Some defaults for file namingd
    # _save and _load is to prevent accidentally overwriting good data
    # as such, freshly created data needs to be managed manually
    #if not os.path.exists(fig_dir): os.makedirs(fig_dir)
    datRoot = f"../data/IS2/"
    datRoot=f"/Volumes/nox/Chance/rifts/data/IS2/"
    #if not os.path.exists(datRoot): os.makedirs(datRoot)
    file03_save = f"{datRoot}{site}_ATL03_cyc{cycle}_res{int(res)}"
    file06_save = f"{datRoot}{site}_ATL06_cyc{cycle}_res{int(res)}"
    fileMel_save = f"{datRoot}{site}_mel_cyc{cycle}_res{int(res)}"
    file03_load = f"{datRoot}{site}_ATL03_res{int(res)}/{site}_ATL03_cyc{cycle}_res{int(res)}"
    file06_load = f"{datRoot}{site}_ATL06_res{int(res)}/{site}_ATL06_cyc{cycle}_res{int(res)}"
    fileMel_load = f"{datRoot}{site}_ATL06_mel_res{int(res)}/{site}_mel_cyc{cycle}_res{int(res)}"
    
    #Params for this location
    parms = {
        "poly": t.getRegion(site, cycle),
        "srt": 0,
        "len": res*2,
        "res": res,
        "cnf": cnf,
        "maxi": 6,
        "ats": 5.0,
        "cnt": 5,
        "H_min_win": 3.0,
        "sigma_r_max": 5.0,
        "cycle": cycle,
        "atl03_geo_fields": ['geoid', 'dem_h', 'dem_flag', 'tide_earth', 'tide_ocean', 'geoid_free2mean', 'tide_earth_free2mean']
    }

    #Triple check this cell
    
    print('getting data')
    atl06_sr = t.get06Data(parms, file06_load=file06_load, accessType=accessType06, file06_save=file06_save, sFlag06=sFlag06, verbose=True)
    
    print('assembling track list')
    # separate all good tracks and put them into a list
    trackList, lens = t.getTrackList(atl06_sr, return_lens=True, omit=ignoreTracks, verbose=True)
    ss_atl06_sr = pd.concat([t.getTrack(atl06_sr, trackInfo)[0] for trackInfo in trackList])
    
    print('tide and geoid corrections')
    #Geoid and Tides
    ss_atl06_sr.h_mean -= ss_atl06_sr.geoid+ss_atl06_sr.tide_earth+ss_atl06_sr.tide_ocean
    
    print('grabbing melange')
    # Establish mélange cutoff from mean
    melange_sr = ss_atl06_sr[ss_atl06_sr.h_mean<ss_atl06_sr.h_mean.mean()]
    
    melange_trackList, melange_lens = t.getTrackList(melange_sr, return_lens=True, verbose=False)
    
    #new tracklist
    ss_atl06_sr_list = [t.getTrack(ss_atl06_sr, trackInfo)[0] for trackInfo in trackList]
    #t.display06Info(ss_atl06_sr)
    
    
    
    if sFlagm==1: 
        print('saving melange')
        t.toGeojson(melange_sr, fileMel_save)

20
getting data
Processing new ATL06-SR dataset
Reference Ground Tracks: [ 23  30  45  84  91 106 167 206 221 228 289 328 343 350 389 404 465 472
 487 526 533 548 587 609 648 724 731 770 785 792 831 846 868]
Beams: ['gt3r', 'gt2r', 'gt1r']
Cycles: [20]
Received 205234 elevations
Across 102 strong tracks
Date range 2023-06-21 to 2023-08-16
Vertical range -58.44071530756748m to 295.8186065046669m
assembling track list
Finding tracks with minimum of 200 photons
88 found
[2307, 2070, 1892, 2031, 2107, 2185, 2485, 2496, 2476, 1111, 2531, 2497, 2259, 2280, 2150, 2200, 2891, 2891, 2815, 2722, 2721, 2717, 2753, 2745, 2739, 2219, 2143, 2065, 1226, 841, 837, 2781, 2773, 2767, 464, 499, 919, 2755, 2761, 2767, 2159, 2235, 2311, 2649, 2583, 2517, 2539, 2478, 2633, 1777, 1853, 1929, 2677, 2673, 2674, 2831, 2837, 2843, 2699, 2695, 2687, 2873, 2881, 2889, 930, 1843, 2224, 2743, 2737, 2731, 2304, 2265, 2193, 2579, 2655, 2731, 2771, 2765, 2759, 1883, 1886, 1553, 2745, 2741, 2732, 2214, 2149, 2081]
tide 

In [6]:
%%time
#EarthData query to acquire granule IDs'
## Is this even necessary??

#Get dates times
if accessType06==0: datetimes = ss_atl06_sr.index
elif accessType06==1: datetimes = pd.to_datetime(ss_atl06_sr.index, format='ISO8601')

earthdata.set_max_resources=2000
# must quiery by space and time
granules_list = earthdata.cmr(short_name='ATL03', polygon=t.getRegion(site, cycle), version='006', time_start=t.getDateTime(datetimes.min()), 
    time_end=t.getDateTime(datetimes.max()), return_metadata=True)
granMD = pd.DataFrame([t.unpackGranuleID(gran) for gran in granules_list[0]]).set_index('granuleID')
mask = granMD['rgt'].isin(ss_atl06_sr.rgt.unique())
granMD = granMD[mask]
granule_sizes = [float(granule['granule_size']) for granule in granules_list[1]]
print(f"{len(granule_sizes)} granules with average size {np.mean(granule_sizes)/1e6} MB, totalling {np.sum(granule_sizes)/1e6/1000} GB")
print(f"subsetted to {len(granMD)}")

if len(ss_atl06_sr.rgt.unique())!=len(granMD): print('number of rgts not matching between granMD and ss_atl06_sr')



43 granules with average size 551.9367357441873 MB, totalling 23.73327963700006 GB
subsetted to 30
CPU times: user 211 ms, sys: 8.92 ms, total: 219 ms
Wall time: 1.01 s


In [32]:
#Single track for testing
currentTrack = melange_trackList[0]

### Info for setting params 
cycle, rgt, gt = currentTrack[0], currentTrack[1], currentTrack[2]
gtNum = int(gt[2])
pair = 0 if gt[3]=="l" else 1

track, cycle, rgt, t.gtDict[gt] = t.getTrack(ss_atl06_sr, currentTrack)
mTrack, cycle, rgt, t.gtDict[gt] = t.getTrack(melange_sr, currentTrack)

##### Set ATL03 sp parameters ##############################
parms = {
    "poly": t.getRegion(site, cycle),
    "srt": 0,
    "len": res*2,
    "res": res,
    # identify ground track
    #"track": gtNum,
    # classification and checks
    # still return photon segments that fail checks
    "pass_invalid": True,
    # all photons
    "cnf": 2,
    "cnt": 5,
    "atl03_geo_fields": ["ref_azimuth", "ref_elev", "geoid", 
        'dem_h', 'dem_flag', 'tide_earth', 'tide_ocean', 'geoid_free2mean', 'tide_earth_free2mean'],
    "atl03_ph_fields": ["delta_time", "weight_ph"],
    # all land classification flags
    "atl08_class": ["atl08_noise", "atl08_ground", "atl08_canopy", "atl08_top_of_canopy", "atl08_unclassified"],
    # all photons
    #"yapc": dict(knn=0, win_h=6, win_x=11, min_ph=4, score=0), 
}

In [29]:
%%time
oneTrackgdf = icesat2.atl03sp(parms, resources=[granMD.index[2]])
onetrack = oneTrackgdf[oneTrackgdf.pair==pair]

CPU times: user 43.4 s, sys: 377 ms, total: 43.7 s
Wall time: 46.3 s


In [31]:
oneTrackgdf.rgt.unique()

array([45], dtype=uint16)

In [33]:
%%time
biggdf = icesat2.atl03sp(parms, resources=list(granMD.index))

Unable to complete request due to errors


CPU times: user 7.65 s, sys: 41 ms, total: 7.69 s
Wall time: 4min 11s


In [119]:
biggdf.pair.unique()

array([0, 1], dtype=uint8)

In [151]:
gdf = biggdf
atl03_yapc = gdf
atl03_yapc.height -= atl03_yapc.geoid+atl03_yapc.tide_ocean+atl03_yapc.tide_earth

AttributeError: 'GeoDataFrame' object has no attribute 'geoid'

In [97]:
atl03_yapc.columns

Index(['solar_elevation', 'segment_id', 'cycle', 'sc_orient', 'pair', 'rgt',
       'segment_dist', 'track', 'background_rate', 'yapc_score', 'landcover',
       'height', 'atl08_class', 'atl03_cnf', 'snowcover', 'quality_ph',
       'y_atc', 'x_atc', 'relief', 'geometry', 'spot'],
      dtype='object')

In [None]:
# ATL03 Processing

#Have a list of only one granule
gdf = icesat2.atl03sp(parms, resources=[trackID])
atl03_yapc = gdf[gdf.pair==pair]

#Geophysical corrections
atl03_yapc.height -= atl03_yapc.geoid+atl03_yapc.tide_ocean+atl03_yapc.tide_earth

# Display Statistics
t.display03Info(atl03_yapc)

if sFlag03==1:
    print(f'Saving file as geojson named {file03_save}.geojson')
    atl03_yapc.to_file(f"{file03_save}.geojson", driver='GeoJSON')
    
    
## Other stuff    
ss_atl03_yapc = t.filter_yapc(atl03_yapc, 0)
pTrack = ss_atl03_yapc
ss_atl03_yapc = atl03_yapc

In [None]:
%%time
# ATL03 Processing for a track

## add something to check if the data already exists

##### Set ATL03 sp parameters ##############################
parms = {
    "poly": region,
    "srt": 0,
    "len": res*2,
    "res": res,
    # identify ground track
    "track": tr,
    # classification and checks
    # still return photon segments that fail checks
    "pass_invalid": True,
    # all photons
    "cnf": 2,
    "cnt": 5,
    "atl03_geo_fields": ["ref_azimuth", "ref_elev", "geoid"],
    "atl03_ph_fields": ["delta_time"],
    # all land classification flags
    "atl08_class": ["atl08_noise", "atl08_ground", "atl08_canopy", "atl08_top_of_canopy", "atl08_unclassified"],
    # all photons
    #"yapc": dict(knn=0, win_h=6, win_x=11, min_ph=4, score=100), 
}

#Have a list of only one granule
gdf = icesat2.atl03sp(parms, resources=[trackID])
atl03_yapc = gdf[gdf.pair==pair]

# Display Statistics
t.display03Info(atl03_yapc)

if sFlag03==1:
    print(f'Saving file as geojson named {file03_save}.geojson')
    atl03_yapc.to_file(f"{file03_save}.geojson", driver='GeoJSON')