In [1]:
import numpy as np
import xarray as xr
import pandas as pd

import h5py
import os,json

In [2]:
#change working directory
%cd ../

/home/jovyan/icepyx


In [3]:
%load_ext autoreload
%autoreload 2

from icepyx import is2class as ipd

### Choose a region for subsetting as well. Use the same region as in the core demo.

In [4]:
region_a = ipd.Icesat2Data('ATL09',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [4]:
session=region_a.earthdata_login('liuzheng','liuzheng@apl.uw.edu')

Earthdata Login password: ········


In [5]:
session=region_a.earthdata_login('jessica.scheick','jessica.scheick@maine.edu')

Earthdata Login password:  ········


### Now, generate variable dictionary. 
Get the variable dictionary by parsing the dataset xml information from NSIDC, by calling ```show_custom_options(session)```. 

The data variables are stored in ```region_a._cust_options['variables']```. 

In [6]:
opts = region_a.show_custom_options(session)

Subsetting options
[{'id': 'ICESAT2',
  'maxGransAsyncRequest': '2000',
  'maxGransSyncRequest': '100',
  'spatialSubsetting': 'true',
  'spatialSubsettingShapefile': 'true',
  'temporalSubsetting': 'true',
  'type': 'both'}]
Data File Formats (Reformatting Options)
['TABULAR_ASCII', 'NetCDF4-CF', 'Shapefile', 'NetCDF-3']
Reprojection Options
[]
Data File (Reformatting) Options Supporting Reprojection
['TABULAR_ASCII', 'NetCDF4-CF', 'Shapefile', 'NetCDF-3', 'No reformatting']
Data File (Reformatting) Options NOT Supporting Reprojection
[]
Data Variables (also Subsettable)
['ds_surf_type',
 'ancillary_data/atlas_sdp_gps_epoch',
 'ancillary_data/control',
 'ancillary_data/data_end_utc',
 'ancillary_data/data_start_utc',
 'ancillary_data/end_cycle',
 'ancillary_data/end_delta_time',
 'ancillary_data/end_geoseg',
 'ancillary_data/end_gpssow',
 'ancillary_data/end_gpsweek',
 'ancillary_data/end_orbit',
 'ancillary_data/end_region',
 'ancillary_data/end_rgt',
 'ancillary_data/granule_end_utc

#### Setup the user provided variable list to subset variables

Options for inputting variables:
1. Use a default list for the dataset (not yet fully implemented across all datasets)
2. Provide a list of variable names, which will return all path-variable combinations (e.g. longitude will return longitude for both beams for all profiles)
3. Provide a list of variable names and/or specific profiles/beams (not yet implemented).

An example of each type of input is below.

In [7]:
#default variables
var_dict = region_a.build_wanted_var_list()

max needed: 2
['ancillary_data' 'orbit_info' 'profile_1' 'profile_2' 'profile_3'
 'quality_assessment']
['atmosphere' 'bckgrd_atlas' 'high_rate' 'low_rate' 'none' 'profile_1'
 'profile_2' 'profile_3']


In [8]:
#variable names
var_list = ['latitude','longitude','bsnow_h','bsnow_dens','bsnow_con','bsnow_psc','bsnow_od']
var_dict = region_a.build_wanted_var_list(var_list=var_list)

In [None]:
#variable names + beams/profiles
###STILL NEED TO MAKE THE BELOW POSSIBLE IN THE CODE

Choose ```latitude``` for ```profile_1``` only for demo purpose. 

```sc_orient``` provide info on beam strength and is stored under ```orbit_info```

In [11]:
subset_kws = {'kw1_list':['profile_1','orbit_info'],'kw2_list':['high_rate'],'var_list':['latitude','sc_orient'],
                                             'add_default_vars':True}

In [23]:
cv09 = region_a.build_subset_coverage(**subset_kws)
print(cv09)
type(cv09)

/ancillary_data/atlas_sdp_gps_epoch,/ancillary_data/control,/ancillary_data/data_end_utc,/ancillary_data/data_start_utc,/ancillary_data/end_cycle,/ancillary_data/end_delta_time,/ancillary_data/end_geoseg,/ancillary_data/end_gpssow,/ancillary_data/end_gpsweek,/ancillary_data/end_orbit,/ancillary_data/end_region,/ancillary_data/end_rgt,/ancillary_data/granule_end_utc,/ancillary_data/granule_start_utc,/ancillary_data/qa_at_interval,/ancillary_data/release,/ancillary_data/start_cycle,/ancillary_data/start_delta_time,/ancillary_data/start_geoseg,/ancillary_data/start_gpssow,/ancillary_data/start_gpsweek,/ancillary_data/start_orbit,/ancillary_data/start_region,/ancillary_data/start_rgt,/ancillary_data/version,/ancillary_data/atmosphere/aclr_use_atlas,/ancillary_data/atmosphere/alpha,/ancillary_data/atmosphere/a_m1,/ancillary_data/atmosphere/a_m2,/ancillary_data/atmosphere/asr_cal_factor,/ancillary_data/atmosphere/atlas_bandpass_fw,/ancillary_data/atmosphere/atlas_tele_fov,/ancillary_data/atm

str

### Setting params and download

In [13]:
region_a.build_CMR_params()
region_a.build_reqconfig_params('download')

In [8]:
region_a.build_subset_params(**{'Coverage':var_dict})
region_a.subsetparams

{'time': '2019-02-22T00:00:00,2019-02-28T23:59:59',
 'bbox': '-55,68,-48,71',
 'Coverage': '/profile_1/bckgrd_atlas/delta_time,/profile_1/high_rate/delta_time,/profile_1/low_rate/delta_time,/profile_2/bckgrd_atlas/delta_time,/profile_2/high_rate/delta_time,/profile_2/low_rate/delta_time,/profile_3/bckgrd_atlas/delta_time,/profile_3/high_rate/delta_time,/profile_3/low_rate/delta_time,/quality_assessment/profile_1/delta_time,/quality_assessment/profile_2/delta_time,/quality_assessment/profile_3/delta_time,/profile_1/high_rate/latitude,/profile_1/low_rate/latitude,/profile_2/high_rate/latitude,/profile_2/low_rate/latitude,/profile_3/high_rate/latitude,/profile_3/low_rate/latitude,/profile_1/high_rate/longitude,/profile_1/low_rate/longitude,/profile_2/high_rate/longitude,/profile_2/low_rate/longitude,/profile_3/high_rate/longitude,/profile_3/low_rate/longitude,/profile_1/high_rate/bsnow_h,/profile_1/low_rate/bsnow_h,/profile_2/high_rate/bsnow_h,/profile_2/low_rate/bsnow_h,/profile_3/high_r

In [10]:
#Identical to above block, but enters the keywords with a different style
region_a.build_subset_params(Coverage=var_dict)
region_a.subsetparams

{'time': '2019-02-22T00:00:00,2019-02-28T23:59:59',
 'bbox': '-55,68,-48,71',
 'Coverage': '/profile_1/bckgrd_atlas/delta_time,/profile_1/high_rate/delta_time,/profile_1/low_rate/delta_time,/profile_2/bckgrd_atlas/delta_time,/profile_2/high_rate/delta_time,/profile_2/low_rate/delta_time,/profile_3/bckgrd_atlas/delta_time,/profile_3/high_rate/delta_time,/profile_3/low_rate/delta_time,/quality_assessment/profile_1/delta_time,/quality_assessment/profile_2/delta_time,/quality_assessment/profile_3/delta_time,/profile_1/high_rate/latitude,/profile_1/low_rate/latitude,/profile_2/high_rate/latitude,/profile_2/low_rate/latitude,/profile_3/high_rate/latitude,/profile_3/low_rate/latitude,/profile_1/high_rate/longitude,/profile_1/low_rate/longitude,/profile_2/high_rate/longitude,/profile_2/low_rate/longitude,/profile_3/high_rate/longitude,/profile_3/low_rate/longitude,/profile_1/high_rate/bsnow_h,/profile_1/low_rate/bsnow_h,/profile_2/high_rate/bsnow_h,/profile_2/low_rate/bsnow_h,/profile_3/high_r

In [10]:
region_a.order_granules(session, verbose=True)

{'feed': {'updated': '2020-03-14T22:11:09.537Z', 'id': 'https://cmr.earthdata.nasa.gov:443/search/granules.json?short_name=ATL09&version=002&temporal=2019-02-22T00%3A00%3A00Z%2C2019-02-28T23%3A59%3A59Z&bounding_box=-55%2C68%2C-48%2C71&page_size=10&page_num=1', 'title': 'ECHO granule metadata', 'entry': [{'producer_granule_id': 'ATL09_20190222003738_08490201_002_01.h5', 'time_start': '2019-02-22T00:37:37.000Z', 'orbit': {'ascending_crossing': '130.68730694092687', 'start_lat': '0', 'start_direction': 'A', 'end_lat': '0', 'end_direction': 'A'}, 'updated': '2019-10-31T08:27:54.161Z', 'orbit_calculated_spatial_domains': [{'equator_crossing_date_time': '2019-02-22T00:37:38.252Z', 'equator_crossing_longitude': '130.68730694092687', 'orbit_number': '2437'}], 'dataset_id': 'ATLAS/ICESat-2 L3A Calibrated Backscatter Profiles and Atmospheric Layer Characteristics V002', 'data_center': 'NSIDC_ECS', 'title': 'SC:ATL09.002:166458094', 'coordinate_system': 'ORBIT', 'time_end': '2019-02-22T02:11:55.0

In [11]:
region_a.download_granules(session,'.')

Beginning download of zipped output...
Data request 5000000499635 of  1  order(s) is complete.


### Examine downloaded subset data file 


In [12]:
fn = '166458094/processed_ATL09_20190222003738_08490201_002_01.h5'

#### Check the downloaded dataset
Take ```latitude``` for example,

In [13]:
varname = 'latitude'
#varname = 'sc_orient'

varlist = []
def IS2h5walk(vname, h5node):
    if isinstance(h5node, h5py.Dataset):
        varlist.append(vname)
    return 

with h5py.File(fn,'r') as h5pt:
    h5pt.visititems(IS2h5walk)
    
for tvar in varlist:
    vpath,vn = os.path.split(tvar)
    if vn==varname: print(tvar) 

profile_1/high_rate/latitude


#### Compare the varaible ```latitude``` in the original data and the subsetted dat

In [14]:
region_a.variables['latitude']

['profile_1/high_rate/latitude',
 'profile_1/low_rate/latitude',
 'profile_2/high_rate/latitude',
 'profile_2/low_rate/latitude',
 'profile_3/high_rate/latitude',
 'profile_3/low_rate/latitude']

## Look at variables from various datasets to generalize code

In [4]:
region_06 = ipd.Icesat2Data('ATL06',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [5]:
region_07 = ipd.Icesat2Data('ATL07',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [6]:
region_08 = ipd.Icesat2Data('ATL08',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [7]:
region_09 = ipd.Icesat2Data('ATL09',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [8]:
region_10 = ipd.Icesat2Data('ATL10',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [10]:
region_12 = ipd.Icesat2Data('ATL12',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [11]:
session=region_06.earthdata_login('jessica.scheick','jessica.scheick@maine.edu')

Earthdata Login password:  ········


In [12]:
for dset in [region_06, region_07, region_08, region_09, region_10, region_12]:
    dset.show_custom_options(session)

Subsetting options
[{'id': 'ICESAT2',
  'maxGransAsyncRequest': '2000',
  'maxGransSyncRequest': '100',
  'spatialSubsetting': 'true',
  'spatialSubsettingShapefile': 'true',
  'temporalSubsetting': 'true',
  'type': 'both'}]
Data File Formats (Reformatting Options)
['TABULAR_ASCII', 'NetCDF4-CF', 'Shapefile', 'NetCDF-3']
Reprojection Options
[]
Data File (Reformatting) Options Supporting Reprojection
['TABULAR_ASCII', 'NetCDF4-CF', 'Shapefile', 'NetCDF-3', 'No reformatting']
Data File (Reformatting) Options NOT Supporting Reprojection
[]
Data Variables (also Subsettable)
['ancillary_data/atlas_sdp_gps_epoch',
 'ancillary_data/control',
 'ancillary_data/data_end_utc',
 'ancillary_data/data_start_utc',
 'ancillary_data/end_cycle',
 'ancillary_data/end_delta_time',
 'ancillary_data/end_geoseg',
 'ancillary_data/end_gpssow',
 'ancillary_data/end_gpsweek',
 'ancillary_data/end_orbit',
 'ancillary_data/end_region',
 'ancillary_data/end_rgt',
 'ancillary_data/granule_end_utc',
 'ancillary_da

In [21]:
d=6
for dset in [region_06, region_07, region_08, region_09, region_10, region_12]:
    vgrp, paths = dset._parse_var_list(dset._cust_options['variables'])
    print(d)
    d=d+1
    for p in paths:
        print(np.unique(np.array(p)))
    print(np.unique(np.array(vgrp.keys())))

6
['ancillary_data' 'gt1l' 'gt1r' 'gt2l' 'gt2r' 'gt3l' 'gt3r' 'orbit_info'
 'quality_assessment']
['gt1l' 'gt1r' 'gt2l' 'gt2r' 'gt3l' 'gt3r' 'land_ice' 'land_ice_segments'
 'none' 'residual_histogram' 'segment_quality']
['bias_correction' 'dem' 'fit_statistics' 'geophysical' 'ground_track'
 'none' 'signal_selection_status']
[dict_keys(['atlas_sdp_gps_epoch', 'control', 'data_end_utc', 'data_start_utc', 'end_cycle', 'end_delta_time', 'end_geoseg', 'end_gpssow', 'end_gpsweek', 'end_orbit', 'end_region', 'end_rgt', 'granule_end_utc', 'granule_start_utc', 'qa_at_interval', 'release', 'start_cycle', 'start_delta_time', 'start_geoseg', 'start_gpssow', 'start_gpsweek', 'start_orbit', 'start_region', 'start_rgt', 'version', 'dt_hist', 'fit_maxiter', 'fpb_maxiter', 'maxiter', 'max_res_ids', 'min_dist', 'min_gain_th', 'min_n_pe', 'min_n_sel', 'min_signal_conf', 'n_hist', 'nhist_bins', 'n_sigmas', 'proc_interval', 'rbin_width', 'sigma_beam', 'sigma_tx', 't_dead', 'win_nsig', 'atl06_quality_summar