In [7]:
import requests
import subprocess
import logging
import os
from loguru import logger
import csv
import urllib.parse
from sentinelsat.sentinel import read_geojson, geojson_to_wkt
import json

In [8]:
def load_credentials():
    with open('credentials.json', 'r') as file:
        credentials = json.load(file)
    return credentials

In [9]:
def downloadGranule_wget(options_and_url):
    '''
    This function is used to download a granule using the wget command-line utility. 
    It takes a single argument options_and_url which is a string that contains the options 
    and URL for the wget command. It returns the exit code of the wget command.
    
    Parameters
    ----------
    options_and_url: A string that contains the options and URL for the wget command.
    '''

    cmd='wget -c --no-check-certificate -v ' + options_and_url
    logging.debug(cmd)
    result = subprocess.run(cmd, shell=True, capture_output=True)
    return result.returncode

In [10]:
def downloadGranule(row, outdir):
    '''
    This function is used to download a granule from the Sentinel-1 SLC dataset. 
    It takes a single argument 'row', which is a dictionary containing the metadata 
    information about the granule to be downloaded. The function downloads the granule 
    from the specified download site(s) using the wget command-line utility, and saves 
    the zip file to a directory on the local file system.

    Parameters
    ----------
    row: A dictionary containing metadata information about the granule to be downloaded. 
         The dictionary should contain the following keys:
         - 'Download Site': A string indicating the download site ('AWS' or 'ASF' or 'both').
         - 'Path Number': A string representing the path number of the granule.
         - 'Frame Number': A string representing the frame number of the granule.
         - 'Granule Name': A string representing the name of the granule.
         - 'Acquisition Date': A string representing the date of the granule acquisition.
         - 'asf_wget_str': A string containing additional options for the wget command when downloading 
                           from the ASF download site (if applicable).
         - 'URL': A string representing the URL for downloading from the ASF download site (if applicable).
    
    '''

    aws_baseurl = 'http://sentinel1-slc-seasia-pds.s3-website-ap-southeast-1.amazonaws.com/datasets/slc/v1.1/'
    download_site = row['Download Site']
    frame_dir='P' + row['Path Number'].zfill(3) + '/F' + row['Frame Number'].zfill(4)
    frame_dir = os.path.join(outdir,frame_dir)
    logger.info(f"Downloading granule {row['Granule Name']} to directory {frame_dir}.")
    output = os.makedirs(frame_dir, exist_ok=True)
    os.chdir(frame_dir)

    urls = []
    if download_site in {'AWS', 'both'}:
        row_date = row['Acquisition Date']
        date_folder = f"{row_date[:4]}/{row_date[5:7]}/{row_date[8:10]}/"
        aws_url = f"{aws_baseurl}{date_folder}{row['Granule Name']}/{row['Granule Name']}.zip"
        urls.append(aws_url)

    if download_site in {'ASF', 'both'}:
        urls.append(f"{row['asf_wget_str']} {row['URL']}")

    for url in urls:
        status = downloadGranule_wget(url)
        if status == 0:
            logger.info(f"{url} download succeeded.")
            break
        else:
            logger.info(f"{url} download failed.")

    os.chdir(outdir)
    return output

In [21]:
def search_sentinel_1(startdate, enddate, geom):
    ''' 
    This function searches for Sentinel-1 satellite images in the Alaska 
    Satellite Facility (ASF) archive within a specific time range and geographic boundary.
    
    Parameters
    ----------
    startdate: A string in the format YYYY-MM-DD, representing the start date of the time range for the search.
    enddate: A string in the format YYYY-MM-DD, representing the end date of the time range for the search.
    geom: A geometry object representing the geographic boundary of the search area.
    '''

    output_format = 'csv'
    # hard-coded ASF query URL:
    asf_baseurl='https://api.daac.asf.alaska.edu/services/search/param?'
    # Use a dictionary to store the query parameters
    # Read GeoJSON file into a dictionary
    geojson_dict = read_geojson(geom)

    # Convert to WKT format
    wkt = geojson_to_wkt(geojson_dict['features'][0]['geometry'])

    query_params = {
        'output': 'csv',
        'platform': 'Sentinel-1A, Sentinel-1B',
        'processingLevel': 'GRD_HD',
        'beamMode': 'IW',
        'intersectsWith': wkt, 
        'start': f'{startdate}T00:00:00UTC',
        'end': f'{enddate}T00:00:00UTC'
    }

    # Use urllib.parse.urlencode() to encode the query parameters
    encoded_params = urllib.parse.urlencode(query_params)

    # Combine the encoded query parameters with the ASF base URL to form the complete URL
    query_url = asf_baseurl + encoded_params

    # Make the request to the ASF API
    logger.info('\nRunning ASF API query:')
    logger.info(query_url + '\n')
    response = requests.post(query_url)

    # Parse the response if it's in CSV format
    if output_format == 'csv':
        # Use csv.DictReader to parse the CSV response into a list of dictionaries
        reader = csv.DictReader(response.text.splitlines())
        rows = list(reader)

        # Log the number of scenes found and their details
        num_scenes = len(rows)
        if num_scenes > 0:
            logger.info(f"Found {num_scenes} scene{'s' if num_scenes > 1 else ''}.")
            for row in rows:
                logger.info(f"Scene {row['Granule Name']}, Path {row['Path Number']} / Frame {row['Frame Number']}")
    
    return rows


In [22]:
def add_download_info(rows, download_site):
    '''
    This function takes a list of dictionaries containing search results for Sentinel-1 scenes and adds additional download 
    information to each dictionary. Specifically, it adds the 'Download Site' key to each dictionary with the value specified in 
    the global variable 'download_site', and adds an 'asf_wget_str' key to each dictionary if the download site is not AWS. 
    The 'asf_wget_str' key contains options that need to be passed to the wget command for ASF downloads (http-user and http-password).
    
    Parameters
    ----------
    rows: A list of dictionaries, where each dictionary contains metadata about a Sentinel-1 scene.
    
    Return Value:
    ----------
    This function returns the modified list of dictionaries with additional download information added to each dictionary.
    '''
    credentials = load_credentials()
    for row in rows:
        if download_site != 'AWS':
            # Pass http-user and http-password for ASF downloads
            asf_wget_options = {
                'http-user':credentials['user'],
                'http-password':credentials['password']} #Ask me for the password
            row['asf_wget_str'] = ' '.join('--%s=%s'%(item[0],item[1]) for item in asf_wget_options.items())
        else:
            row['asf_wget_str'] = ''
        row['Download Site'] = download_site
    return rows

In [23]:
def download_sentinel_1(nproc, rows, outdir):
    '''
    This function downloads Sentinel-1 data in parallel. It takes as input the number of parallel
    download processes to run and a list of dictionaries containing information about each granule to download.
    The function uses the 'wget' command to download the data. If the 'download_site' parameter is set to 'AWS',
    the data will be downloaded from the AWS S3 bucket. If it is set to 'ASF', the data will be downloaded from
    the ASF DAAC. If it is set to 'both', the function will attempt to download the data from both sites.
    
    Parameters:
    -----------
    nproc : int
        The number of parallel download processes to run.
    rows : list of dict
        A list of dictionaries containing information about each granule to download.
        Each dictionary should have the following keys: 'Granule Name', 'Download URL', 'asf_wget_str', 'Download Site'.
    '''
    
    logger.info(f"\nRunning {nproc} downloads in parallel.")
    download_site = 'both'
    downloadList = add_download_info(rows, download_site)

    for product in downloadList:
        logger.info("Attempting to download")
        downloadGranule(product, outdir)

In [24]:
#Set your area of interest (polygon projected in 4326)
aoi_geojson = ".../polygon_example.geojson" #Path to AOI
 
#Specify starting date
startdate = "2022-04-01"
#Specify ending date
enddate = "2022-04-05"

#How many images to be downloaded in parallel
nproc = 20
#Set to TRUE if you want to download the found S1 products
download = False

#Specify output directory to save the downloaded S1 products
outdir = ".../path-to-output" #Path to output directory

rows = search_sentinel_1(startdate, enddate, aoi_geojson)
if download:
    download_sentinel_1(nproc, rows, outdir)

[32m2023-12-11 15:47:55.119[0m | [1mINFO    [0m | [36m__main__[0m:[36msearch_sentinel_1[0m:[36m40[0m - [1m
Running ASF API query:[0m
[32m2023-12-11 15:47:55.120[0m | [1mINFO    [0m | [36m__main__[0m:[36msearch_sentinel_1[0m:[36m41[0m - [1mhttps://api.daac.asf.alaska.edu/services/search/param?output=csv&platform=Sentinel-1A%2C+Sentinel-1B&processingLevel=GRD_HD&beamMode=IW&intersectsWith=MULTIPOLYGON%28%28%2820.4338+39.3408%2C19.3671+39.9610%2C23.2619+42.0945%2C27.7522+41.8464%2C28.5460+37.5547%2C28.5460+34.3792%2C25.7427+33.6846%2C21.6742+34.7266%2C20.0617+36.7608%2C20.4338+39.3408%29%29%29&start=2022-04-01T00%3A00%3A00UTC&end=2022-04-05T00%3A00%3A00UTC
[0m
[32m2023-12-11 15:47:58.258[0m | [1mINFO    [0m | [36m__main__[0m:[36msearch_sentinel_1[0m:[36m53[0m - [1mFound 18 scenes.[0m
[32m2023-12-11 15:47:58.259[0m | [1mINFO    [0m | [36m__main__[0m:[36msearch_sentinel_1[0m:[36m55[0m - [1mScene S1A_IW_GRDH_1SDV_20220404T161636_20220404T161701_