In [1]:
#######
## feb 11, 2022
## testing the functions zach wrote for data download
## doesn't use wget and fully "pythonized" - uses requests instead
## using asf api to generate URLS


# import libraries
import numpy as np
import re
import zipfile
import getpass
import os  # for chdir, getcwd, path.basename, path.exists
import pandas as pd # for DatetimeIndex
import netrc
import glob
import requests
from shapely.geometry import Polygon, mapping
from datetime import datetime
from subprocess import PIPE, Popen
from os.path import join, isdir, isfile, basename
import progressbar
from tqdm import tqdm
import logging

log = logging.getLogger(__name__)
logging.basicConfig()
log.setLevel(logging.WARNING)

In [2]:
# input NASA Earthdata credentials here
ASF_USER = input("Enter Username: ")
ASF_PASS = getpass.getpass("Enter Password: ")

Enter Username:  jacktarricone
Enter Password:  ············


### Function definitions to query ASF APi

In [3]:
def build_bbox_string(polygon):
    '''
    Builds the string to include in the ASF search request. The bbox consists of 4 comma-separated numbers: lower left longitude,latitude, and upper right longitude,latitude.
    
    Parmeters
    ----------
    polygon: shapely polygon
    
    Returns
    ----------
    polygon_string : string
        String to include in the ASF request
    '''
    points = mapping(polygon)['coordinates'][0]
    lower_left = points[0]
    upper_right = points[2]
    bbox_string = f'{lower_left[0]},{lower_left[1]},{upper_right[0]},{upper_right[1]}'
        
    return bbox_string

In [4]:
def search_asf(platform, processingLevel, start, end, polygon, output_format):
    '''
    Search the ASF platform for images given the input parameters
    
    Parameters
    ----------
        platform : string
            Name of the imaging platform. Defaults to UAVSAR, but a list of supported platform is available on the ASF website
        processingLevel : string
            Processing level of the imaging product. 
            Possible values for UAVSAR : (KMZ, PROJECTED, PAULI, PROJECTED_ML5X5, STOKES, AMPLITUDE, BROWSE, COMPLEX, DEM_TIFF, PROJECTED_ML3X3, METADATA, AMPLITUDE_GRD, INTERFEROMETRY, INTERFEROMETRY_GRD, THUMBNAIL)
        start : datetime object
            Start date of the search period.
        end : datetime object
            End date of the search period.
        polygon : shapely polygon defining the Area of Interest,
        output_format: string
            Format being returned by the ASF API. Values : CSV, JSON, KML, METALINK, COUNT, DOWNLOAD, GEOJSON
        
    Returns
    -------
    Ouputs a search file
    '''
    base = 'https://api.daac.asf.alaska.edu/services/search/param'
    start_date = start.strftime('%Y-%m-%dT%H:%M:%SUTC')
    end_date = end.strftime('%Y-%m-%dT%H:%M:%SUTC')
    aoi_string = build_bbox_string(polygon)
    payload = {
        'platform': platform,
        'processingLevel': processingLevel,
        'start': start,
        'end': end,
        'bbox': aoi_string,
        'output': output_format
    }
    r = requests.get(base, params=payload)
    
    return r.json()

In [5]:
def stream_download(url, output_f):
    """
    Args:
        url: url to download
        output_f: path to save the data to
    """

    r = requests.get(url, stream=True)
    if r.status_code == 200:
        # Progress bar - https://towardsdatascience.com/how-to-download-files-using-python-part-2-19b95be4cdb5
        total_size= int(r.headers.get('content-length', 0))
        with open(output_f, 'wb') as f:
            with tqdm(total=total_size, unit='B', unit_scale=True , desc=f'Downloading {basename(url)}') as pbar:
                for ch in r.iter_content(chunk_size=1024):
                    if ch:
                        f.write(ch)
                        pbar.update(len(ch))
    else:
        log.warning(f'HTTP CODE {r.status_code}. Skipping download!')

In [6]:
def download_image(url, output_dir, ann = False):
    """
    Downloads uavsar InSAR files from a url.
    Args:
        url (string): A url containing uavsar flight data. Can be from JPL or ASF
        output_dir (string): Directory to save the data in
    Returns:
        out_fp (string): File path to downloaded image.
    Raises:
       None
    """

    log.info(f'Starting download of {url}...')
    local = join(output_dir, basename(url))

    # Make the output dir if it doesn't exist
    if not isdir(output_dir):
        os.makedirs(output_dir)

    if not isfile(local):
        stream_download(url, local)
    else:
        log.info(f'{local} already exists, skipping download!')

    if ann:
        if url.split('.')[-1] == 'zip' or url.split('.')[-1] == 'ann':
            log.info('Download already contains ann file, skipping download!')
        else:
            parent = dirname(url)
            # ASF formatting - query parent directory
            if parent.split('.')[-1] == 'zip':
                log.debug(f'ASF url found for {url}')
                parent_files = requests.get(parent).json()['response']
                ann_info = [i for i in parent_files if '.ann' in i['name']][0]
                # assert len(ann_info) == 1, 'More than one ann file detected'
                ann_url = ann_info['url']
                log.debug(f'Annotation url: {ann_url}')

            # JPL formatting - have to parse url to get ann
            elif 'uavsar.asfdaac.alaska.edu' in url:
                log.debug(f'JPL url found for {url}')
                ext = url.split('.')[-1]
                pols = ['VVVV','HHHH','HVHV', 'HHHV', 'HHVV','HVVV']
                slc_pol = [pol for pol in pols if (pol in url)]
                if len(slc_pol) == 1:
                    url = url.replace(slc_pol[0], '')

                if ext == 'grd':
                    if len(basename(url).split('.')) == 2:
                        url = url.replace('.grd','.ann')
                    if len(basename(url).split('.')) == 3:
                        url = url.replace('.grd','')
                    ext = url.split('.')[-1]
                ann_url = url.replace(f'.{ext}', '.ann')
                log.debug(f'Annotation url: {ann_url}')

            else:
                log.warning('No ann url found. Unable to download ann file.')
                ann_url = None

            if ann_url:
                ann_local = join(output_dir, basename(ann_url))
                log.debug(f'Annotation local: {ann_local} and url {ann_url}')
                if not isfile(ann_local):
                    stream_download(ann_url, ann_local)
                else:
                    log.info(f'{ann_local} already exists, skipping download!')
    return local


In [7]:
# define search parameters for sierra flight line
sierra_polygon = Polygon([(-119.9697,37.4631),(-118.9576,37.4631),(-118.9576,38.7211),(-119.969,38211)])
start_date = datetime.strptime('2020-02-28 11:00:00', '%Y-%m-%d %H:%M:%S') 
end_date = datetime.strptime('2020-03-11 11:00:00', '%Y-%m-%d %H:%M:%S') 

In [8]:
# query API to generate one over pass
results = search_asf(platform='UAVSAR', processingLevel='INTERFEROMETRY_GRD', 
                    start=start_date, end=end_date, polygon=sierra_polygon, output_format='JSON')[0]

# print number of products
print(f'{len(results)} product(s) found')

1 product(s) found


In [9]:
# define url
for i in results:
        downloadUrl = i['downloadUrl']
        print(downloadUrl)

https://datapool.asf.alaska.edu/INTERFEROMETRY_GRD/UA/sierra_17305_20014-000_20016-005_0014d_s01_L090_01_int_grd.zip


In [None]:
download_image(url = downloadUrl, 
               output_dir = '/Users/jacktarricone/Desktop/zach_test/')

Downloading sierra_17305_20014-000_20016-005_0014d_s01_L090_01_int_grd.zip:   7%

In [None]:
downloadUrl

In [None]:
# get new path for folder of insar data just downloaded
new_path_list = glob.glob('/Users/jacktarricone/ch2_sierra_data/sierra/*')
new_path = new_path_list[3] # select first list elemet
print(new_path)