In [13]:
import os
import gc
import re
import json
import shutil
import zipfile
import requests
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry.polygon import orient
from shapely.geometry import Polygon, mapping
from xml.etree import ElementTree as ET
from icelakes.utilities import get_size

from icelakes.utilities import encedc, decedc
from icelakes.nsidc import edc

In [14]:
# def download_granule(granule_id, gtxs, geojson, granule_output_path, uid, pwd, vars_sub='default', spatial_sub=False): 

In [50]:
granule_id = 'ATL03_20220714010847_03381603_006_02.h5'
gtxs = 'all'
geojson = 'geojsons/simplified_GRE_2000_CW.geojson'
granule_output_path = 'IS2data'
vars_sub = 'default'
spatial_sub = True
uid = decedc(edc().u)
pwd = decedc(edc().p)

print('--> parameters: granule_id = %s' % granule_id)
print('                gtxs = %s' % gtxs)
print('                geojson = %s' % geojson)
print('                granule_output_path = %s' % granule_output_path)
print('                vars_sub = %s' % vars_sub)
print('                spatial_sub = %s\n' % spatial_sub)

short_name = 'ATL03'
version = granule_id[30:33]
granule_search_url = 'https://cmr.earthdata.nasa.gov/search/granules'
capability_url = f'https://n5eil02u.ecs.nsidc.org/egi/capabilities/{short_name}.{version}.xml'
base_url = 'https://n5eil02u.ecs.nsidc.org/egi/request'

geojson_filepath = str(os.getcwd() + '/' + geojson)

# set the variables for subsetting
if vars_sub == 'default':
    vars_sub = ['/ancillary_data/atlas_sdp_gps_epoch',
                '/ancillary_data/calibrations/dead_time/gtx',
                '/orbit_info/rgt',
                '/orbit_info/cycle_number',
                '/orbit_info/sc_orient',
                '/gtx/geolocation/segment_id',
                '/gtx/geolocation/ph_index_beg',
                '/gtx/geolocation/segment_dist_x',
                '/gtx/geolocation/segment_length',
                '/gtx/geolocation/segment_ph_cnt',
                # '/gtx/geophys_corr/dem_h',
                '/gtx/geophys_corr/geoid',
                # '/gtx/bckgrd_atlas/pce_mframe_cnt',
                # '/gtx/bckgrd_atlas/bckgrd_counts',
                # '/gtx/bckgrd_atlas/bckgrd_int_height',
                # '/gtx/bckgrd_atlas/delta_time',
                '/gtx/heights/lat_ph',
                '/gtx/heights/lon_ph',
                '/gtx/heights/h_ph',
                '/gtx/heights/delta_time',
                '/gtx/heights/dist_ph_along',
                '/gtx/heights/quality_ph',
                # '/gtx/heights/signal_conf_ph',
                '/gtx/heights/pce_mframe_cnt',
                '/gtx/heights/ph_id_pulse'
                ]
    if int(version) > 5:
        vars_sub.append('/gtx/heights/weight_ph')
beam_list = ['gt1l', 'gt1r', 'gt2l', 'gt2r', 'gt3l', 'gt3r']

if gtxs == 'all':
    var_list = sum([[v.replace('/gtx','/'+bm) for bm in beam_list] if '/gtx' in v else [v] for v in vars_sub],[])
elif type(gtxs) == str:
    var_list = [v.replace('/gtx','/'+gtxs.lower()) if '/gtx' in v else v for v in vars_sub]
elif type(gtxs) == list:
    var_list = sum([[v.replace('/gtx','/'+bm.lower()) for bm in gtxs] if '/gtx' in v else [v] for v in vars_sub],[])
else: # default to requesting all beams
    var_list = sum([[v.replace('/gtx','/'+bm) for bm in beam_list] if '/gtx' in v else [v] for v in vars_sub],[])

# search for the given granule
search_params = {
    'short_name': short_name,
    'page_size': 100,
    'page_num': 1,
    'producer_granule_id': granule_id}

granules = []
headers={'Accept': 'application/json'}
while True:
    response = requests.get(granule_search_url, params=search_params, headers=headers)
    results = json.loads(response.content)

    if len(results['feed']['entry']) == 0:
        # Out of results, so break out of loop
        break

    # Collect results and increment page_num
    granules.extend(results['feed']['entry'])
    search_params['page_num'] += 1
    
granule_list, idx_unique = np.unique(np.array([g['producer_granule_id'] for g in granules]), return_index=True)
granules = [g for i,g in enumerate(granules) if i in idx_unique] # keeps double counting, not sure why
print('\nDownloading ICESat-2 data. Found granules:')
if len(granules) == 0:
    print('None')
    print("return 'none', 404")
for result in granules:
    print('  '+result['producer_granule_id'], f', {float(result["granule_size"]):.2f} MB',sep='')
    
# Use geopandas to read in polygon file as GeoDataFrame object 
# Note: a shapefile, KML, or almost any other vector-based spatial data format could be substituted here.
gdf = gpd.read_file(geojson_filepath)

# make sure the two regions that go over the date line are adjusted 
# if ('West_Ep-F.geojson' in geojson_filepath) or ('East_E-Ep.geojson' in geojson_filepath): 
#     lon180 = np.array(gdf.geometry.iloc[0].exterior.coords.xy[0])
#     lon180[lon180 < 0] = lon180[lon180 < 0]  + 360
#     gdf['geometry'] = Polygon(list(zip(lon180, gdf.geometry.iloc[0].exterior.coords.xy[1])))
#     poly = orient(gdf.loc[0].geometry,sign=1.0)
#     lon180 = np.array(poly.exterior.coords.xy[0])
#     # lon180[lon180 >= 180] = lon180[lon180 >= 180] - 360
#     gdf['geometry'] = Polygon(list(zip(lon180, gdf.geometry.iloc[0].exterior.coords.xy[1])))
#     poly = gdf.loc[0].geometry

# Simplify polygon for complex shapes in order to pass a reasonable request length to CMR. 
# The larger the tolerance value, the more simplified the polygon.
# Orient counter-clockwise: CMR polygon points need to be provided in counter-clockwise order. 
# The last point should match the first point to close the polygon.
# poly = orient(gdf.simplify(0.05, preserve_topology=False).loc[0],sign=1.0)
# else:
poly = orient(gdf.loc[0].geometry,sign=1.0)

geojson_data = gpd.GeoSeries(poly).to_json() # Convert to geojson
geojson_data = geojson_data.replace(' ', '') #remove spaces for API call

#Format dictionary to polygon coordinate pairs for CMR polygon filtering
polygon = ','.join([str(c) for xy in zip(*poly.exterior.coords.xy) for c in xy])

print('\nInput geojson:', geojson)
print('Simplified polygon coordinates based on geojson input:', polygon)

# Create session to store cookie and pass credentials to capabilities url
session = requests.session()
s = session.get(capability_url)
response = session.get(s.url,auth=(uid,pwd))
try:
    root = ET.fromstring(response.content)
except:
    print('\nError: status code %s (%s)\n' % (response.status_code, response.reason))

#collect lists with each service option
subagent = [subset_agent.attrib for subset_agent in root.iter('SubsetAgent')]

# this is for getting possible variable values from the granule search
if len(subagent) > 0 :
    # variable subsetting
    variables = [SubsetVariable.attrib for SubsetVariable in root.iter('SubsetVariable')]  
    variables_raw = [variables[i]['value'] for i in range(len(variables))]
    variables_join = [''.join(('/',v)) if v.startswith('/') == False else v for v in variables_raw] 
    variable_vals = [v.replace(':', '/') for v in variables_join]

# make sure to only request the variables that are available
def intersection(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    return lst3
if vars_sub == 'all':
    var_list_subsetting = ''
else:
    var_list_subsetting = intersection(variable_vals,var_list)

if len(subagent) < 1 :
    print('No services exist for', short_name, 'version', latest_version)
    agent = 'NO'
    coverage,Boundingshape,polygon = '','',''
else:
    agent = ''
    subdict = subagent[0]
    if (subdict['spatialSubsettingShapefile'] == 'true') and spatial_sub:
        ######################################## Boundingshape = geojson_data
        Boundingshape = polygon
    else:
        Boundingshape, polygon = '',''
    coverage = ','.join(var_list_subsetting)
if (vars_sub=='all') & (not spatial_sub):
    agent = 'NO'
    
page_size = 100
request_mode = 'stream'
request_mode = 'async'
page_num = int(np.ceil(len(granules)/page_size))

param_dict = {'short_name': short_name, 
              'producer_granule_id': granule_id,
              'version': version,  
              'polygon': polygon,
              'Boundingshape': Boundingshape,  
              'Coverage': coverage, 
              'page_size': page_size, 
              'request_mode': request_mode, 
              'agent': agent, 
              'email': 'yes'}

#Remove blank key-value-pairs
param_dict = {k: v for k, v in param_dict.items() if v != ''}

#Convert to string
param_string = '&'.join("{!s}={!r}".format(k,v) for (k,v) in param_dict.items())
param_string = param_string.replace("'","")

#Print API base URL + request parameters
endpoint_list = [] 
for i in range(page_num):
    page_val = i + 1
    API_request = api_request = f'{base_url}?{param_string}&page_num={page_val}'
    endpoint_list.append(API_request)

print('\nAPI request URL:')
print(*endpoint_list, sep = "\n") 

# Create an output folder if the folder does not already exist.
path = str(os.getcwd() + '/' + granule_output_path)
if not os.path.exists(path):
    os.mkdir(path)

# Different access methods depending on request mode:
for i in range(page_num):
    page_val = i + 1
    print('\nOrder: ', page_val)
    print('Requesting...')
    request = session.get(base_url, params=param_dict)
    print('HTTP response from order response URL: ')
    cont = str(request._content)
    print(request.status_code, ':', cont[cont.find('<Code>')+6:cont.find('</Code>')],
          '(', cont[cont.find('<Message>')+9:cont.find('</Message>')], ')\n')
    request.raise_for_status()
    d = request.headers['content-disposition']
    fname = re.findall('filename=(.+)', d)
    dirname = os.path.join(path,fname[0].strip('\"'))
    print('Downloading...')
    open(dirname, 'wb').write(request.content)
    print('Data request', page_val, 'is complete.')

# Unzip outputs
for z in os.listdir(path): 
    if z.endswith('.zip'): 
        zip_name = path + "/" + z 
        zip_ref = zipfile.ZipFile(zip_name) 
        zip_ref.extractall(path) 
        zip_ref.close() 
        os.remove(zip_name) 

# Clean up Outputs folder by removing individual granule folders 
for root, dirs, files in os.walk(path, topdown=False):
    for file in files:
        try:
            shutil.move(os.path.join(root, file), path)
        except OSError:
            pass
    for name in dirs:
        os.rmdir(os.path.join(root, name))
        
print('\nUnzipped files and cleaned up directory.')
print('Output data saved in:', granule_output_path)

filelist = [granule_output_path+'/'+f for f in os.listdir(granule_output_path) \
            if os.path.isfile(os.path.join(granule_output_path, f)) & (granule_id in f)]

if len(filelist) == 0: 
    print("return 'none'")
else:
    filename = filelist[0]
print('File to process: %s (%s)' % (filename, get_size(filename)))

print(filename, 'status:', request.status_code)
print('return filename, request.status_code')

--> parameters: granule_id = ATL03_20220714010847_03381603_006_02.h5
                gtxs = all
                geojson = geojsons/simplified_GRE_2000_CW.geojson
                granule_output_path = IS2data
                vars_sub = default
                spatial_sub = True


Downloading ICESat-2 data. Found granules:
  ATL03_20220714010847_03381603_006_02.h5, 1424.04 MB

Input geojson: geojsons/simplified_GRE_2000_CW.geojson
Simplified polygon coordinates based on geojson input: -51.44333278940379,68.31809363093925,-50.486283375267334,68.11987917978698,-47.888360504831006,68.15015250103632,-45.8852447938031,68.20688811986273,-45.58581097310032,68.40707395862792,-45.608317627349,69.12362160986147,-48.61534211860709,71.45252643441924,-48.89314373451282,71.96096228898168,-50.74725418384058,72.21898074863127,-51.61509318403716,72.03789873962877,-52.001165820260915,71.7349281448636,-51.562856327973385,71.62820997865076,-51.78569434632586,71.22300173656397,-51.207644017164796,71.03944933

KeyError: 'content-disposition'

In [48]:
request._content

b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n<eesi:agentResponse xmlns="" xmlns:iesi="http://eosdis.nasa.gov/esi/rsp/i" xmlns:ssw="http://newsroom.gsfc.nasa.gov/esi/rsp/ssw" xmlns:eesi="http://eosdis.nasa.gov/esi/rsp/e" xmlns:esi="http://eosdis.nasa.gov/esi/rsp" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\n    <order>\n        <orderId>5000004475575</orderId>\n        <Instructions>You may receive an email about your order if you specified an EMAIL address. &lt;br/&gt;&lt;br/&gt;The instructions used to process this order are:  Bounding Shape=-51.44333278940379,68.31809363093925,-50.486283375267334,68.11987917978698,-47.888360504831006,68.15015250103632,-45.8852447938031,68.20688811986273,-45.58581097310032,68.40707395862792,-45.608317627349,69.12362160986147,-48.61534211860709,71.45252643441924,-48.89314373451282,71.96096228898168,-50.74725418384058,72.21898074863127,-51.61509318403716,72.03789873962877,-52.001165820260915,71.7349281448636,-51.562856327973385,

In [34]:
cont = str(request._content)
cont[cont.find('<Message>')+9:cont.find('</Message>')]

'Maximum number of concurrent synchronous requests has been reached'

In [35]:
request.status_code

503

In [33]:
str(request._content)

'b\'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\\n<eesi:Exception xmlns="" xmlns:iesi="http://eosdis.nasa.gov/esi/rsp/i" xmlns:ssw="http://newsroom.gsfc.nasa.gov/esi/rsp/ssw" xmlns:eesi="http://eosdis.nasa.gov/esi/rsp/e" xmlns:esi="http://eosdis.nasa.gov/esi/rsp" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\\n    <Code>TooManyJobs</Code>\\n    <Message>Maximum number of concurrent synchronous requests has been reached</Message>\\n</eesi:Exception>\\n\''

In [40]:
print(request.status_code, ':', cont[cont.find('<Code>')+6:cont.find('</Code>')],
          '(', cont[cont.find('<Message>')+9:cont.find('</Message>')], ')\n')

503 : TooManyJobs ( Maximum number of concurrent synchronous requests has been reached )



In [None]:
vars(response).keys()

In [None]:
response.request

In [None]:
response.elapsed