<font size="1">Copyright 2021, by the California Institute of Technology. ALL RIGHTS RESERVED. United States Government sponsorship acknowledged. Any commercial use must be negotiated with the Office of Technology Transfer at the California Institute of Technology.</font>
    
<font size="1">This software may be subject to U.S. export control laws and regulations. By accepting this document, the user agrees to comply with all applicable U.S. export laws and regulations. User has the responsibility to obtain export licenses, or other export authority as may be required, before exporting such information to foreign countries or providing access to foreign persons.<font>

# Data Access
This notebook provides simple examples for means of accessing datasets store on some of the more common stores.



In [None]:
# General Initialization
#------------------------------------------------------------

import os
import requests, json, getpass
from requests.auth import HTTPBasicAuth
import urllib3
from urllib.parse import urlparse

urllib3.disable_warnings()

# this block makes sure the directory set-up/change is only done once and relative to the notebook's directory
try:
    start_dir
except NameError:
    start_dir = os.getcwd()
    !mkdir -p ./notebook_output/02.1-Data-Access
    os.chdir('notebook_output/02.1-Data-Access')

## 1. PCM environment
This is the storage of datasets produced as output of PCM processing. Stored in S3 buckets, the locations are obtained through the Pele API, which requires pre-registration as described in notebook 01-Introduction. This notebook also assumes that you have a ~/.netrc file containing an entry approprite to the <mozart_ip> specified below and have used the aws-login utility to refresh your AWS credentials. For the latter, run the following command in a terminal window:

```aws-login -p default```

and enter your aws login information if/when prompted.

A more detailed explanation of the steps below is found in the 02-Datasets-geospatial notebook.

In [None]:
from pele.lib.client import PeleRequests

# set the base url to interact with the goddess, Pele
mozart_ip = "100.64.122.98"
base_url = "https://{}/pele/api/v0.1".format(mozart_ip)
print("Using Pele base url {}.".format(mozart_ip))

# Instantiate PeleRequests object
#------------------------------------------------------------
pr = PeleRequests(base_url, verify=False)                                               # <--- Initialize endpoint


# Query for the dataset ID of the `L2_L_GSLC` dataset that satisifies search criteria.
#------------------------------------------------------------
search_poly=[[[-122,40],[-122,38.5],[-125,38.5],[-125,40],[-122,40]]]
search_start_time = '2012-11-05'
search_end_time = '2012-11-05T23:59:59Z'

r = pr.post(base_url + '/pele/dataset/L2_L_GSLC/dataset_ids',                           # <--- Query for dataset(s)
            json = { 'polygon' : search_poly, 'start_time' : search_start_time, 'end_time' : search_end_time })

# Make sure the request succeeded
assert r.status_code == 200

# Obtain the dataset metadata
#------------------------------------------------------------
res = r.json()

# Make sure there are qualifying datasets
assert len(res['dataset_ids']) > 0

dataset_id = res['dataset_ids'][0]
print("Result dataset id {}".format(dataset_id))
r = pr.get(base_url + '/pele/dataset/{}'.format(dataset_id))                           # <--- Pull dataset metadata
res = r.json()

# print(json.dumps(res, indent=2))    # Uncomment to see the full metadata

# Pull the dataset urls
urls = res['result']['urls']
# print("urls: {}".format(urls))      # Uncomment to see the set of dataset URLs

# Identify the S3 url
s3_url = None
for i in urls:
    if i.startswith('s3://'): s3_url = i
assert s3_url is not None

url = 's3://{}'.format(urlparse(s3_url).path[1:])
local_dir = os.path.basename(url)
print (local_dir)

# Copy the dataset files from S3 to the local filesystem
!aws s3 sync {url} {local_dir}

## 2. ASF (VERTEX) API

This example submits a query for UAVSAR ground projected interferogram dataset covering the Point Reyes Lighthouse on January 16, 2020. Only some of the valid search terms are covered below, for more details about using the API go to: https://asf.alaska.edu/api/. The Vertex UI, an interactive tool useful for identifying example datasets is at: https://search.asf.alaska.edu/.

In [None]:
base_asf_search_url = "https://api.daac.asf.alaska.edu/services/search/param"     # <--- search URL for the ASF API

# platform can be one or more (comma separated) of: 
#      ALOS, A3, AIRSAR, AS, ERS, ERS-1, E1, ERS-2, E2, JERS-1, J1, 
#      RADARSAT-1, R1, SEASAT, SS, S1, Sentinel, Sentinel-1, 
#      Sentinel-1A, SA, Sentinel-1B, SB, SMAP, SP, UAVSAR, UA
platform = "UAVSAR"

# processing level varies by platform - see API documentation noted above.
processingLevel = "INTERFEROMETRY_GRD"

# this is the search polygon, note the format, which differs from that used in the PCM example above.
polygon = "-123.0461,37.964,-122.9467,37.964,-122.9467,38.0524,-123.0461,38.0524,-123.0461,37.964"

# start/end dates, in standard YYYY-MM-DDTHH:MI:SSZ format
start = "2020-01-16T00:00:00Z"
end = "2020-01-16T23:59:59Z"

# output format we want is JSON, other formats are: CSV, JSON, KML, METALINK, COUNT, DOWNLOAD, GEOJSON
output_format = "JSON"

# create the search
search_url_template = "{}?platform={}&processingLevel={}&polygon={}&start={}&end={}&output={}"
search_url = search_url_template.format(base_asf_search_url, platform, processingLevel, 
                                       polygon,start,end,output_format)                  # <-- create/submit search
print("Submitting request {}".format(search_url))

r = requests.get(search_url)

# Uncomment if you want to see the dataset metadata
# print("dataset:\n {}".format(r.content.decode()))

# The dataset file is provided by 'downloadUrl' in the response JSON
downloadUrl = r.json()[0][0]['downloadUrl']                                              # <-- extract download URL
print("\ndownloadUrl : {}".format(downloadUrl))

### Use the Osaka API to download the file
We will use the PCM Osaka API to perform the download for us, which will perform necessary authentications and redirections. First, make sure your earthdata login is specified in the ~/.netrc file (**anywhere *before* the line 'macdef init'**):

```machine urs.earthdata.nasa.gov login <earthdata_user> password <earthdata_password>```

**Lock Files**:
Note that Osaka employs lock files to ensure no download contention between processes and threads occurs. However, in the event the download fails (for example due to missing or incorrect credentials), the lock file is left behind which prevents subsequent attempts. If you encounter an error below to the effect of "lock file already locked", in a terminal window go to the nisar-on-demand-use-cases/notebook_output/02.1-Data-Access directory and remove the file with the ".lock" extension and retry the download.

In [None]:
import osaka.main

print("Downloading {}".format (downloadUrl))
osaka.main.get(downloadUrl, ".")                                                  # <-- download the file w/ Osaka
print("Download complete.\n\n")

download_filename = os.path.basename(downloadUrl)
!ls -l ./$download_filename

## 3. ASF via MAAP
#### (WIP : need to determine how to download a .vrt file and its source files.

This example provides the means to access UAVSAR radar instrument data as extracted in the maap-plant-demo_20181206 notebook.

For this example, you'll need to install the maap module through the folowing steps:

    1. Open a terminal from the jupyter notebook home page.
    2. In the jovyan home directory, run: git clone https://github.com/MAAP-Project/maap-py.git
    3. Enter the newly created directory: cd maap-py
    4. Perform the installation: python setup.py install
    5. Create a copy of the maap.cfg file in the jovyan home directory: cp maap.cfg ~

Note that since maap isn't a component of the underlying jupyter server image, you will need to repeate steps 3 and 4 each time you restart the jupyter server.

In [None]:
from osgeo import gdal
from maap.maap import MAAP, Granule
from pprint import pprint
import multiprocessing

# Workaround for a bug in MAAP initializer
if (os.environ.get("MAAP_CONF") is None):
    os.environ["MAAP_CONF"] = "."

# Initialize MAAP
maap = MAAP()                                                                                        # <-- Initialize 

# Query for Lope National Park radar data for track 001
#------------------------------------------------------------
radarGranules = maap.searchGranule(site_name="Lope National Park Gabon", 
                                   track_number="001", 
                                   collectionConceptId='C1200000308-NASA_MAAP')                      # <-- Query
# pprint(radarGranules)           # Uncomment to see the full metadata

# extract the vrt filename from the first granule
radarGranule = list(filter(lambda granule: granule._location.endswith('.vrt'), radarGranules))[0]
# pprint(radarGranule)             # Uncomment to see the granule

if not os.path.exists(os.path.basename(radarGranule._location)):                                     # <-- Download
    print('Downloading {}'.format(os.path.basename(radarGranule._location)))
    !aws s3 cp $radarGranule._location .

# get the associated SLCs
slcs_to_download = []
for slc in radarGranules:
    granule_id = slc["Granule"]["GranuleUR"]
    if (granule_id.startswith('uavsar_AfriSAR_v1_SLC-lopenp') and granule_id.endswith('slc')):
       slcs_to_download.append(slc)

for slc in slcs_to_download:
    if not os.path.exists(slc["Granule"]["GranuleUR"]):
        print(f"downloading {slc._location}")
        data = slc.getLocalPath()
    else:
        print(f"{slc._location} already exists locally, skipping.")

<font size="1">This notebook is compatible with NISAR Jupyter Server Stack v1.4 and above</font>