In [1]:
import numpy as np
import xarray as xr
import pandas as pd

import h5py
import os,json
from pprint import pprint

In [2]:
#change working directory
%cd ../

/home/jovyan/icepyx


In [3]:
%load_ext autoreload
%autoreload 2

from icepyx import is2class as ipd

### Choose a region for subsetting as well. Use the same region as in the core demo.

In [9]:
region_a = ipd.Icesat2Data('ATL09',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [17]:
region_07 = ipd.Icesat2Data('ATL07',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [None]:
session=region_a.earthdata_login('liuzheng','liuzheng@apl.uw.edu')

In [10]:
session=region_a.earthdata_login('jessica.scheick','jessica.scheick@maine.edu')

Earthdata Login password:  ········


### Now, generate variable dictionary. 
Get the variable dictionary by parsing the dataset xml information from NSIDC, by calling ```show_custom_options(session)```. 

The data variables are stored in ```region_a._cust_options['variables']```. 

In [6]:
opts = region_a.show_custom_options(session,dictview=True)

Subsetting options
[{'id': 'ICESAT2',
  'maxGransAsyncRequest': '2000',
  'maxGransSyncRequest': '100',
  'spatialSubsetting': 'true',
  'spatialSubsettingShapefile': 'true',
  'temporalSubsetting': 'true',
  'type': 'both'}]
Data File Formats (Reformatting Options)
['TABULAR_ASCII', 'NetCDF4-CF', 'Shapefile', 'NetCDF-3']
Reprojection Options
[]
Data File (Reformatting) Options Supporting Reprojection
['TABULAR_ASCII', 'NetCDF4-CF', 'Shapefile', 'NetCDF-3', 'No reformatting']
Data File (Reformatting) Options NOT Supporting Reprojection
[]
Data Variables (also Subsettable)
{'a_m1': ['ancillary_data/atmosphere/a_m1'],
 'a_m2': ['ancillary_data/atmosphere/a_m2'],
 'aclr_true': ['profile_1/high_rate/aclr_true',
               'profile_2/high_rate/aclr_true',
               'profile_3/high_rate/aclr_true'],
 'aclr_use_atlas': ['ancillary_data/atmosphere/aclr_use_atlas'],
 'alpha': ['ancillary_data/atmosphere/alpha'],
 'apparent_surf_reflec': ['profile_1/high_rate/apparent_surf_reflec',
    

#### Setup the user provided variable list to subset variables: STILL TESTING HERE!

Options for inputting variables:
1. Use a default list for the dataset (not yet fully implemented across all datasets)
2. Provide a list of variable names, which will return all path-variable combinations (e.g. longitude will return longitude for both beams for all profiles)
3. Provide a list of variable names and/or specific profiles/beams (not yet implemented).

An example of each type of input is below.

In [None]:
#default variables
var_dict = region_a.build_wanted_var_list()
region_a.variables

In [15]:
var_dict = region_a.build_wanted_var_list(beam_list=['profile_1'],var_list=['latitude'],defaults=False)

['ancillary_data' 'orbit_info' 'profile_1' 'profile_2' 'profile_3'
 'quality_assessment']
['atmosphere' 'bckgrd_atlas' 'high_rate' 'low_rate' 'none' 'profile_1'
 'profile_2' 'profile_3']


In [18]:
region_07.show_custom_options(session,dictview=True)

Subsetting options
[{'id': 'ICESAT2',
  'maxGransAsyncRequest': '2000',
  'maxGransSyncRequest': '100',
  'spatialSubsetting': 'true',
  'spatialSubsettingShapefile': 'true',
  'temporalSubsetting': 'true',
  'type': 'both'}]
Data File Formats (Reformatting Options)
['TABULAR_ASCII', 'NetCDF4-CF', 'NetCDF-3']
Reprojection Options
[]
Data File (Reformatting) Options Supporting Reprojection
['TABULAR_ASCII', 'NetCDF4-CF', 'NetCDF-3', 'No reformatting']
Data File (Reformatting) Options NOT Supporting Reprojection
[]
Data Variables (also Subsettable)
{'across_track_distance': ['gt1l/sea_ice_segments/heights/across_track_distance',
                           'gt1r/sea_ice_segments/heights/across_track_distance',
                           'gt2l/sea_ice_segments/heights/across_track_distance',
                           'gt2r/sea_ice_segments/heights/across_track_distance',
                           'gt3l/sea_ice_segments/heights/across_track_distance',
                           'gt3r/sea_

In [19]:
var_dict = region_07.build_wanted_var_list(beam_list=['gt1l'],var_list=['height_segment_height'],defaults=False)
pprint(var_dict)

['ancillary_data' 'gt1l' 'gt1r' 'gt2l' 'gt2r' 'gt3l' 'gt3r' 'orbit_info'
 'quality_assessment']
['coarse_surface_finding' 'fine_surface_finding' 'none' 'sea_ice'
 'sea_ice_segments' 'surface_classification']
{'atlas_sdp_gps_epoch': ['ancillary_data/atlas_sdp_gps_epoch'],
 'data_end_utc': ['ancillary_data/data_end_utc'],
 'data_start_utc': ['ancillary_data/data_start_utc'],
 'end_delta_time': ['ancillary_data/end_delta_time'],
 'granule_end_utc': ['ancillary_data/granule_end_utc'],
 'granule_start_utc': ['ancillary_data/granule_start_utc'],
 'height_segment_height': ['gt1l/sea_ice_segments/heights/height_segment_height'],
 'sc_orient': ['orbit_info/sc_orient'],
 'start_delta_time': ['ancillary_data/start_delta_time']}


In [25]:
#variable names
var_list = ['latitude','longitude','bsnow_h','bsnow_dens','bsnow_con','bsnow_psc','bsnow_od']
var_dict = region_a.build_wanted_var_list(beam_list=['profile_1'],var_list=var_list,defaults=False, kw1_list=['quality_assessment'])
pprint(var_dict)

['ancillary_data' 'orbit_info' 'profile_1' 'profile_2' 'profile_3'
 'quality_assessment']
['atmosphere' 'bckgrd_atlas' 'high_rate' 'low_rate' 'none' 'profile_1'
 'profile_2' 'profile_3']
{'atlas_sdp_gps_epoch': ['ancillary_data/atlas_sdp_gps_epoch'],
 'bsnow_con': ['profile_1/high_rate/bsnow_con', 'profile_1/low_rate/bsnow_con'],
 'bsnow_dens': ['profile_1/high_rate/bsnow_dens'],
 'bsnow_h': ['profile_1/high_rate/bsnow_h', 'profile_1/low_rate/bsnow_h'],
 'bsnow_od': ['profile_1/high_rate/bsnow_od', 'profile_1/low_rate/bsnow_od'],
 'bsnow_psc': ['profile_1/high_rate/bsnow_psc', 'profile_1/low_rate/bsnow_psc'],
 'data_end_utc': ['ancillary_data/data_end_utc'],
 'data_start_utc': ['ancillary_data/data_start_utc'],
 'end_delta_time': ['ancillary_data/end_delta_time'],
 'granule_end_utc': ['ancillary_data/granule_end_utc'],
 'granule_start_utc': ['ancillary_data/granule_start_utc'],
 'latitude': ['profile_1/high_rate/latitude', 'profile_1/low_rate/latitude'],
 'longitude': ['profile_1/high_

In [None]:
#variable names + beams/profiles
###STILL NEED TO MAKE THE BELOW POSSIBLE IN THE CODE

Choose ```latitude``` for ```profile_1``` only for demo purpose. 

```sc_orient``` provide info on beam strength and is stored under ```orbit_info```

In [None]:
subset_kws = {'kw1_list':['profile_1','orbit_info'],'kw2_list':['high_rate'],'var_list':['latitude','sc_orient'],
                                             'defaults':True}

In [None]:
vgrp, paths = region_a._parse_var_list(region_a._cust_options['variables'])


### Setting params and download

In [None]:
region_a.build_CMR_params()
region_a.build_reqconfig_params('download')

In [None]:
region_a.build_subset_params(**{'Coverage':var_dict})
region_a.subsetparams

In [None]:
#Identical to above block, but enters the keywords with a different style
region_a.build_subset_params(Coverage=var_dict)
region_a.subsetparams

In [None]:
region_a.order_granules(session, verbose=True)

In [None]:
region_a.download_granules(session,'.')

### Examine downloaded subset data file 


In [None]:
fn = '166458094/processed_ATL09_20190222003738_08490201_002_01.h5'

#### Check the downloaded dataset
Take ```latitude``` for example,

In [None]:
varname = 'latitude'
#varname = 'sc_orient'

varlist = []
def IS2h5walk(vname, h5node):
    if isinstance(h5node, h5py.Dataset):
        varlist.append(vname)
    return 

with h5py.File(fn,'r') as h5pt:
    h5pt.visititems(IS2h5walk)
    
for tvar in varlist:
    vpath,vn = os.path.split(tvar)
    if vn==varname: print(tvar) 

#### Compare the varaible ```latitude``` in the original data and the subsetted dat

In [None]:
region_a.variables['latitude']

In [None]:
', '.join(x) for x in ['gt1l','gt1r']

## Look at variables from various datasets to generalize code

In [7]:
region_06 = ipd.Icesat2Data('ATL06',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [8]:
region_07 = ipd.Icesat2Data('ATL07',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [9]:
region_08 = ipd.Icesat2Data('ATL08',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [10]:
region_09 = ipd.Icesat2Data('ATL09',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [11]:
region_10 = ipd.Icesat2Data('ATL10',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [12]:
region_12 = ipd.Icesat2Data('ATL12',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \
                           start_time='00:00:00', end_time='23:59:59', version='2')

In [None]:
session=region_a.earthdata_login('liuzheng','liuzheng@apl.uw.edu')

In [None]:
session=region_06.earthdata_login('jessica.scheick','jessica.scheick@maine.edu')

In [None]:
dset = region_10
dset.show_custom_options(session,dictview=True)

In [13]:
## show the maximum depth for variables in dataset
for dset in [region_06, region_07, region_08, region_09, region_10, region_12]:
    dset._get_custom_options(session)
    max_dep = 0
    for vn in dset._cust_options['variables']:
        wrds = vn.split('/')
        if len(wrds)-1> max_dep: max_dep = len(wrds)-1
    #print(dset.dataset,max_dep)

In [None]:
vgrp, paths = region_10._parse_var_list(region_10._cust_options['variables'])
import pprint
pprint.pprint(vgrp)

In [None]:
vgrp.keys()

In [None]:
for dset in [region_06, region_07, region_08, region_09, region_10, region_12]:
    dset.show_custom_options(session, dictview=True)

In [14]:
d=6
for dset in [region_06, region_07, region_08, region_09, region_10, region_12]:
    vgrp, paths = dset._parse_var_list(dset._cust_options['variables'])
    print(d)
    d=d+1
    for p in paths:
        print(np.unique(np.array(p)))
    print(np.unique(np.array(vgrp.keys())))

6
['ancillary_data' 'gt1l' 'gt1r' 'gt2l' 'gt2r' 'gt3l' 'gt3r' 'orbit_info'
 'quality_assessment']
['gt1l' 'gt1r' 'gt2l' 'gt2r' 'gt3l' 'gt3r' 'land_ice' 'land_ice_segments'
 'none' 'residual_histogram' 'segment_quality']
['bias_correction' 'dem' 'fit_statistics' 'geophysical' 'ground_track'
 'none' 'signal_selection_status']
[dict_keys(['atlas_sdp_gps_epoch', 'control', 'data_end_utc', 'data_start_utc', 'end_cycle', 'end_delta_time', 'end_geoseg', 'end_gpssow', 'end_gpsweek', 'end_orbit', 'end_region', 'end_rgt', 'granule_end_utc', 'granule_start_utc', 'qa_at_interval', 'release', 'start_cycle', 'start_delta_time', 'start_geoseg', 'start_gpssow', 'start_gpsweek', 'start_orbit', 'start_region', 'start_rgt', 'version', 'dt_hist', 'fit_maxiter', 'fpb_maxiter', 'maxiter', 'max_res_ids', 'min_dist', 'min_gain_th', 'min_n_pe', 'min_n_sel', 'min_signal_conf', 'n_hist', 'nhist_bins', 'n_sigmas', 'proc_interval', 'rbin_width', 'sigma_beam', 'sigma_tx', 't_dead', 'win_nsig', 'atl06_quality_summar