In [124]:
import json
import requests
from getpass import getpass
import sys
import time
import re
import threading
import datetime
import os
import pandas as pd
import geopandas as gpd
import geojson
import pprint
from geojson import Polygon, Feature, FeatureCollection, dump

import warnings
warnings.filterwarnings("ignore")

In [125]:
# user inputs
################################################
# change tile number
tile_number = 'h11v8'
# change data set name
datasetName = 'landsat_ard_tile_c2'
#  sensor list comment uncomment as needed
sensors = [
            'LC08',
           'LE07',
           'LT05'
           ]
cloudCoverFilter = {'min' : 0, 'max' : 75}
fileType = 'band'
# user input ends
################################################

    Downloading: LT05_CU_011008_20051013_20210429_02_SR_B2.TIF...


In [126]:
# read spatial extent from a csv
aoi_df = pd.read_csv('tile_aoi.csv')
aoi_df = aoi_df[aoi_df['tile']==tile_number]
# corner coordinate
llx = aoi_df['llx'].to_list()[0]
lly = aoi_df['lly'].to_list()[0]
urx = aoi_df['urx'].to_list()[0]
ury = aoi_df['ury'].to_list()[0]
# create a spatial filter
spatialFilter = {
    "filterType": "mbr",
    "lowerLeft": {
        "latitude": lly,
        "longitude": llx
    },
    "upperRight": {
        "latitude": ury ,
        "longitude": urx
    }
}

pprint.pprint(spatialFilter)

{'filterType': 'mbr',
 'lowerLeft': {'latitude': 40.57473547, 'longitude': -106.4507224},
 'upperRight': {'latitude': 41.02315511, 'longitude': -105.6376465}}


In [127]:
# Send http request
def sendRequest(url, data, apiKey = None, exitIfNoResponse = True):
    """
    Send a request to an M2M endpoint and returns the parsed JSON response.

    Parameters:
    endpoint_url (str): The URL of the M2M endpoint
    payload (dict): The payload to be sent with the request

    Returns:
    dict: Parsed JSON response
    """
    
    json_data = json.dumps(data)
    
    if apiKey == None:
        response = requests.post(url, json_data)
    else:
        headers = {'X-Auth-Token': apiKey}              
        response = requests.post(url, json_data, headers = headers)  
    
    try:
      httpStatusCode = response.status_code 
      if response == None:
          print("No output from service")
          if exitIfNoResponse: sys.exit()
          else: return False
      output = json.loads(response.text)
      if output['errorCode'] != None:
          print(output['errorCode'], "- ", output['errorMessage'])
          if exitIfNoResponse: sys.exit()
          else: return False
      if  httpStatusCode == 404:
          print("404 Not Found")
          if exitIfNoResponse: sys.exit()
          else: return False
      elif httpStatusCode == 401: 
          print("401 Unauthorized")
          if exitIfNoResponse: sys.exit()
          else: return False
      elif httpStatusCode == 400:
          print("Error Code", httpStatusCode)
          if exitIfNoResponse: sys.exit()
          else: return False
    except Exception as e: 
          response.close()
          print(e)
          if exitIfNoResponse: sys.exit()
          else: return False
    response.close()
    
    return output['data']

In [128]:
# function definition for file download
def downloadFile(url):
    sema.acquire()
    try:
        response = requests.get(url, stream=True)
        disposition = response.headers['content-disposition']
        filename = re.findall("filename=(.+)", disposition)[0].strip("\"")
        print(f"    Downloading: {filename}...")
        
        open(os.path.join(data_dir, filename), 'wb').write(response.content)
        sema.release()
    except Exception as e:
        print(f"\nFailed to download from {url}. Will try to re-download.")
        sema.release()
        runDownload(threads, url)

In [129]:
# download resource 
def runDownload(threads, url):
    thread = threading.Thread(target=downloadFile, args=(url,))
    threads.append(thread)
    thread.start()

In [130]:
data_dir = 'data'
utils_dir = 'utils'
dirs = [ data_dir, utils_dir]

for d in dirs:
        if not os.path.exists(d): 
            try: 
                os.makedirs(d)
                print(f"Directory '{d}' created successfully.") 
            except OSError as e: 
                print(f"Error creating directory '{d}': {e}") 
        else: 
            print(f"Directory '{d}' already exists.") 

Directory 'data' already exists.
Directory 'utils' already exists.


In [131]:
maxthreads = 5 # Threads count for downloads
sema = threading.Semaphore(value=maxthreads)
label = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") # Customized label using date time
threads = []

In [132]:
def prompt_ERS_login(serviceURL):
    print("Logging in...\n")

    p = ['Enter EROS Registration System (ERS) Username: ', 'Enter ERS Account Token: ']

    # Use requests.post() to make the login request
    response = requests.post(f"{serviceURL}login-token",
                             json={'username': 'ymaramb',
                                    'token':'LflumIwnm1H!ViUcdVMb!IjAP1w!MRF5qLjqX8V_svaZRu6J@awzS1@F0DZ_Wp@G'})

    if response.status_code == 200:  # Check for successful response
        apiKey = response.json()['data']
        print('\nLogin Successful, API Key Received!')
        headers = {'X-Auth-Token': apiKey}
        return apiKey
    else:
        print("\nLogin was unsuccessful, please try again or create an account at: https://ers.cr.usgs.gov/register.")
        

In [139]:
serviceUrl = "https://m2m.cr.usgs.gov/api/api/json/stable/"
apiKey = prompt_ERS_login(serviceUrl)

Logging in...


Failed to download from https://landsatlook.usgs.gov/tile/collection02/tm/2005/CU/011/008/LT05_CU_011008_20051105_20210429_02/LT05_CU_011008_20051105_20210429_02_ST_B6.TIF?requestSignature=eyJkb3dubG9hZEFwcCI6Ik0yTSIsImNvbnRhY3RJZCI6MjQ2MjU0NzMsImRvd25sb2FkSWQiOjY3NTY5NTQ0OSwiZGF0ZUdlbmVyYXRlZCI6IjIwMjQtMDktMjZUMjI6MjU6MjItMDU6MDAiLCJpZCI6IkxUMDVfQ1VfMDExMDA4XzIwMDUxMTA1XzIwMjEwNDI5XzAyX1NUX0I2LlRJRiIsInNpZ25hdHVyZSI6IiQ1JCREZS5PRWZTQUxSZ3ZGQVwvTWpIdlwvQ3BIaThLaVZGZlltYi5vOHdDdDZQNjMifQ==. Will try to re-download.

Failed to download from https://landsatlook.usgs.gov/tile/collection02/tm/2005/CU/011/008/LT05_CU_011008_20050130_20210428_02/LT05_CU_011008_20050130_20210428_02_SR_B2.TIF?requestSignature=eyJkb3dubG9hZEFwcCI6Ik0yTSIsImNvbnRhY3RJZCI6MjQ2MjU0NzMsImRvd25sb2FkSWQiOjY3NTY5NTE1NiwiZGF0ZUdlbmVyYXRlZCI6IjIwMjQtMDktMjZUMjI6MjU6MDctMDU6MDAiLCJpZCI6IkxUMDVfQ1VfMDExMDA4XzIwMDUwMTMwXzIwMjEwNDI4XzAyX1NSX0IyLlRJRiIsInNpZ25hdHVyZSI6IiQ1JCR6Mlk1NzQ0SGZ3ZTAxWmNwQ3JKZHNBTFc4cD

    Downloading: LT05_CU_011008_20050522_20210429_02_SR_B4.TIF...
    Downloading: LT05_CU_011008_20050420_20210429_02_SR_B1.TIF...


In [140]:
# single sensor overide
sensors = [
    'LT05',
    #'LE07',
    #'LC08'
    ] 
# walk over each sensor 
for sensor in sensors:
    print(sensor)
    if sensor == 'LT05':
        bandNames = {'SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B7', 'QA_PIXEL', 'ST_B6'}
        #temporal_coverage = {'start' : '2000-01-01', 'end' : '2012-05-05'}
        temporal_coverage = {'start' : '2005-01-01', 'end' : '2012-05-05'}
        #print(bandNames)
    elif sensor == 'LE07':
        bandNames = {'SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B7', 'QA_PIXEL', 'ST_B6'}
        #temporal_coverage = {'start' : '2000-01-01', 'end' : '2024-01-19'} # mission ended 2022-04-06
        temporal_coverage = {'start' : '2022-01-01', 'end' : '2024-01-19'}
        #print(bandNames)
    else:
        bandNames = {'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7', 'QA_PIXEL', 'ST_B10'}
        temporal_coverage = {'start' : '2013-01-01', 'end' : '2024-09-01'}
        #print(bandNames)

    # temporal filter: list temporal filter  dicts
    # just devide the large temporal range to anual cycles to avoid bad response from api
    dates = pd.date_range(start=temporal_coverage['start'], 
                      end=temporal_coverage['end'], 
                      freq='YS') 
    # add final date manually
    dates = dates.append(pd.to_datetime([temporal_coverage['end']]))
    # create a list of dict using loop
    temp_list = [{'start': str(dates[i].date()), 'end': str(dates[i+1].date())} for i in range(len(dates)-1)]
    # start downloading for each: after this line
    for temporalFilter in temp_list:
            # search payload
            search_payload = {
            'datasetName' : datasetName,
            'sceneFilter' : {
                'spatialFilter' : spatialFilter,
                'acquisitionFilter' : temporalFilter,
                'cloudCoverFilter' : cloudCoverFilter}
                }
            # scene search
            scenes = sendRequest(serviceUrl + "scene-search", search_payload, apiKey)
            # idfeild to grab
            idField = 'entityId'
            # entity id list
            entityIds = []
            # take entid if bulk true
            for result in scenes['results']:
                # Add this scene to the list I would like to download if bulk is available
                if result['options']['bulk'] == True:
                    entityIds.append(result[idField])
            # filter for sensor
            # select only a single sensor: so the only one sensor goes to next list ids
            entityIds = [item for item in entityIds if item[:4] == sensor]
            if len(entityIds) == 0: continue
            listId = f"temp_{datasetName}_list" # customized list id
            # scn list payload: mesg from me to api
            scn_list_add_payload = {
                "listId": listId,
                'idField' : idField,
                "entityIds": entityIds,
                "datasetName": datasetName
                }
            # clean old requests: otherwise it mixed up new request with old
            sendRequest(serviceUrl + "scene-list-remove", {"listId": listId}, apiKey) 
            # number of image count
            count = sendRequest(serviceUrl + "scene-list-add", scn_list_add_payload, apiKey)
            print(f'number of images to download {count}')
            # add download code; rename old folder
            sendRequest(serviceUrl + "scene-list-get", {'listId' : scn_list_add_payload['listId']}, apiKey)
            # 
            download_opt_payload = {
                        "listId": listId,
                        "datasetName": datasetName
                        }
            #if fileType == 'band_group':
            #     download_opt_payload['includeSecondaryFileGroups'] = True
            products = sendRequest(serviceUrl + "download-options", download_opt_payload, apiKey)
            filegroups = sendRequest(serviceUrl + "dataset-file-groups", {'datasetName' : datasetName}, apiKey)

            # file group id
            fileGroupIds = {"ls_c2ard_sr"} # can change this

            # Select products
            print("Selecting products...")
            downloads = []
            if fileType == 'bundle':
                # Select bundle files
                print("    Selecting bundle files...")
                for product in products:        
                    if product["bulkAvailable"] and product['downloadSystem'] != 'folder':               
                        downloads.append({"entityId":product["entityId"], "productId":product["id"]})


            elif fileType == 'band':
                # Select band files
                print("    Selecting band files...")
                for product in products:  
                    if product["secondaryDownloads"] is not None and len(product["secondaryDownloads"]) > 0:
                        for secondaryDownload in product["secondaryDownloads"]:
                            for bandName in bandNames:
                                if secondaryDownload["bulkAvailable"] and bandName in secondaryDownload['displayId']:
                                    downloads.append({"entityId":secondaryDownload["entityId"], "productId":secondaryDownload["id"]})


            elif fileType == 'band_group':        
                # Get secondary dataset ID and file group IDs with the scenes
                print("    Checking for scene band groups and get secondary dataset ID and file group IDs with the scenes...")
                sceneFileGroups = []
                entityIds = []
                datasetId = None
                for product in products:  
                    if product["secondaryDownloads"] is not None and len(product["secondaryDownloads"]) > 0:
                        for secondaryDownload in product["secondaryDownloads"]:
                            if secondaryDownload["bulkAvailable"] and secondaryDownload["fileGroups"] is not None:
                                if datasetId == None:
                                    datasetId = secondaryDownload['datasetId']
                                for fg in secondaryDownload["fileGroups"]:                            
                                    if fg not in sceneFileGroups:
                                        sceneFileGroups.append(fg)
                                    if secondaryDownload['entityId'] not in entityIds:
                                        entityIds.append(secondaryDownload['entityId'])

                # Send dataset request to get the secondary dataset name by the dataset ID
                data_req_payload = {
                    "datasetId": datasetId,
                }
                results = sendRequest(serviceUrl + "dataset", data_req_payload, apiKey)
                secondaryDatasetName = results['datasetAlias']

                # Add secondary scenes to a list
                secondaryListId = f"temp_{datasetName}_scecondary_list" # customized list id
                sec_scn_add_payload = {
                    "listId": secondaryListId,
                    "entityIds": entityIds,
                    "datasetName": secondaryDatasetName
                }

                print("    Adding secondary scenes to list...")
                count = sendRequest(serviceUrl + "scene-list-add", sec_scn_add_payload, apiKey)    
                print("    Added", count, "secondary scenes\n")

                # Compare the provided file groups Ids with the scenes' file groups IDs
                if fileGroupIds:
                    fileGroups = []
                    for fg in fileGroupIds:
                        fg = fg.strip() 
                        if fg in sceneFileGroups:
                            fileGroups.append(fg)
                else:
                    fileGroups = sceneFileGroups
            else:
                # Select all available files
                for product in products:        
                    if product["bulkAvailable"]:
                        if product['downloadSystem'] != 'folder':            
                            downloads.append({"entityId":product["entityId"], "productId":product["id"]})
                        if product["secondaryDownloads"] is not None and len(product["secondaryDownloads"]) > 0:
                            for secondaryDownload in product["secondaryDownloads"]:
                                if secondaryDownload["bulkAvailable"]:
                                    downloads.append({"entityId":secondaryDownload["entityId"], "productId":secondaryDownload["id"]})

                        
            


            # sending download request
            if fileType != 'band_group':
                download_req2_payload = {
                    "downloads": downloads,
                    "label": label
                }
            else:
                if len(fileGroups) > 0:
                    download_req2_payload = {
                        "dataGroups": [
                            {
                                "fileGroups": fileGroups,
                                "datasetName": secondaryDatasetName,
                                "listId": secondaryListId
                            }
                        ],
                        "label": label
                    }
                else:
                    print('No file groups found')
                    sys.exit()

            print(f"Sending download request ...")
            download_request_results = sendRequest(serviceUrl + "download-request", download_req2_payload, apiKey)
            print(f"Done sending download request") 

            if len(download_request_results['newRecords']) == 0 and len(download_request_results['duplicateProducts']) == 0:
                print('No records returned, please update your scenes or scene-search filter')
                sys.exit()

            # Attempt the download URLs 
            for result in download_request_results['availableDownloads']:
                print(f"Get download url: {result['url']}\n" )
                runDownload(threads, result['url'])
                
            preparingDownloadCount = len(download_request_results['preparingDownloads'])
            preparingDownloadIds = []
            if preparingDownloadCount > 0:
                for result in download_request_results['preparingDownloads']:  
                    preparingDownloadIds.append(result['downloadId'])

                download_ret_payload = {"label" : label}                
                # Retrieve download URLs
                print("Retrieving download urls...\n")
                download_retrieve_results = sendRequest(serviceUrl + "download-retrieve", download_ret_payload, apiKey, False)
                if download_retrieve_results != False:
                    print(f"    Retrieved: \n" )
                    for result in download_retrieve_results['available']:
                        if result['downloadId'] in preparingDownloadIds:
                            preparingDownloadIds.remove(result['downloadId'])
                            runDownload(threads, result['url'])
                            print(f"       {result['url']}\n" )
                        
                    for result in download_retrieve_results['requested']:   
                        if result['downloadId'] in preparingDownloadIds:
                            preparingDownloadIds.remove(result['downloadId'])
                            runDownload(threads, result['url'])
                            print(f"       {result['url']}\n" )
                
                # Didn't get all download URLs, retrieve again after 30 seconds
                while len(preparingDownloadIds) > 0: 
                    print(f"{len(preparingDownloadIds)} downloads are not available yet. Waiting for 30s to retrieve again\n")
                    time.sleep(30)
                    download_retrieve_results = sendRequest(serviceUrl + "download-retrieve", download_ret_payload, apiKey, False)
                    if download_retrieve_results != False:
                        for result in download_retrieve_results['available']:                            
                            if result['downloadId'] in preparingDownloadIds:
                                preparingDownloadIds.remove(result['downloadId'])
                                print(f"    Get download url: {result['url']}\n" )
                                runDownload(threads, result['url'])
                                
            print("\nDownloading files... Please do not close the program\n")
            for thread in threads:
                thread.join() 

            # create a log file
            with open(tile_number+"_logFile.txt", "w") as file:
                # write info into logfile
                start_year = temporalFilter['start'][:4]
                text = f"Images available for year {start_year} is {count}\n"
                file.write(sensor + " ")
                file.write(text)


# rename the folder to the tile ID
os.rename('Data', tile_number)

# logging out from the system
endpoint = "logout"  
if sendRequest(serviceUrl + endpoint, None, apiKey) == None:        
    print("\nLogged Out\n")
else:
    print("\nLogout Failed\n")


LT05
number of images to download 47
Selecting products...
    Selecting band files...
Sending download request ...
Done sending download request
Get download url: https://landsatlook.usgs.gov/tile/collection02/tm/2005/CU/011/008/LT05_CU_011008_20050107_20210428_02/LT05_CU_011008_20050107_20210428_02_SR_B1.TIF?requestSignature=eyJkb3dubG9hZEFwcCI6Ik0yTSIsImNvbnRhY3RJZCI6MjQ2MjU0NzMsImRvd25sb2FkSWQiOjY3NTg3MzU3NywiZGF0ZUdlbmVyYXRlZCI6IjIwMjQtMDktMjdUMTE6NTY6MTQtMDU6MDAiLCJpZCI6IkxUMDVfQ1VfMDExMDA4XzIwMDUwMTA3XzIwMjEwNDI4XzAyX1NSX0IxLlRJRiIsInNpZ25hdHVyZSI6IiQ1JCRaWDlBZXZYc0dlTWNBNUw4bUF2TENadlV3YjBsZE0zTGNwaEhBdW9oc1c3In0=

Get download url: https://landsatlook.usgs.gov/tile/collection02/tm/2005/CU/011/008/LT05_CU_011008_20050107_20210428_02/LT05_CU_011008_20050107_20210428_02_SR_B2.TIF?requestSignature=eyJkb3dubG9hZEFwcCI6Ik0yTSIsImNvbnRhY3RJZCI6MjQ2MjU0NzMsImRvd25sb2FkSWQiOjY3NTg3MzU3OCwiZGF0ZUdlbmVyYXRlZCI6IjIwMjQtMDktMjdUMTE6NTY6MTQtMDU6MDAiLCJpZCI6IkxUMDVfQ1VfMDExMDA4XzIwMDUwMTA3Xz

KeyboardInterrupt: 

    Downloading: LT05_CU_011008_20050607_20210429_02_SR_B2.TIF...    Downloading: LT05_CU_011008_20050413_20210429_02_SR_B4.TIF...

    Downloading: LT05_CU_011008_20050319_20210428_02_SR_B3.TIF...
    Downloading: LT05_CU_011008_20050107_20210428_02_SR_B2.TIF...
    Downloading: LT05_CU_011008_20050107_20210428_02_SR_B3.TIF...
    Downloading: LT05_CU_011008_20050107_20210428_02_SR_B4.TIF...
    Downloading: LT05_CU_011008_20050107_20210428_02_SR_B5.TIF...
    Downloading: LT05_CU_011008_20050107_20210428_02_SR_B7.TIF...
    Downloading: LT05_CU_011008_20050107_20210428_02_ST_B6.TIF...
    Downloading: LT05_CU_011008_20050107_20210428_02_QA_PIXEL.TIF...
    Downloading: LT05_CU_011008_20050114_20210428_02_SR_B1.TIF...
    Downloading: LT05_CU_011008_20050114_20210428_02_SR_B2.TIF...
    Downloading: LT05_CU_011008_20050114_20210428_02_SR_B3.TIF...
    Downloading: LT05_CU_011008_20050114_20210428_02_SR_B4.TIF...
    Downloading: LT05_CU_011008_20050114_20210428_02_SR_B5.TIF...
    Dow

In [141]:
# logging out from the system
endpoint = "logout"  
if sendRequest(serviceUrl + endpoint, None, apiKey) == None:        
    print("\nLogged Out\n")
else:
    print("\nLogout Failed\n")

AUTH_EXPIRED -  API key has expired due to inactivity, please logout and re-login.


SystemExit: 

In [142]:
remove_scnlst_payload = {
    "listId": listId
}
sendRequest(serviceUrl + "scene-list-remove", remove_scnlst_payload, apiKey)

if fileType == 'band_group':    
    # Remove the secondary scene list
    remove_scnlst2_payload = {
        "listId": secondaryListId
    }
    sendRequest(serviceUrl + "scene-list-remove", remove_scnlst2_payload, apiKey)

AUTH_EXPIRED -  API key has expired due to inactivity, please logout and re-login.


SystemExit: 


Failed to download from https://landsatlook.usgs.gov/tile/collection02/tm/2005/CU/011/008/LT05_CU_011008_20050616_20210429_02/LT05_CU_011008_20050616_20210429_02_SR_B1.TIF?requestSignature=eyJkb3dubG9hZEFwcCI6Ik0yTSIsImNvbnRhY3RJZCI6MjQ2MjU0NzMsImRvd25sb2FkSWQiOjY3NTg3Mzc0OCwiZGF0ZUdlbmVyYXRlZCI6IjIwMjQtMDktMjdUMTE6NTY6MjItMDU6MDAiLCJpZCI6IkxUMDVfQ1VfMDExMDA4XzIwMDUwNjE2XzIwMjEwNDI5XzAyX1NSX0IxLlRJRiIsInNpZ25hdHVyZSI6IiQ1JCQ5a3ZNZE1jVm56YXZOeHdFNW1cL2xyWTRCQkJmcEVrbE1reU9OXC83LjNiU0EifQ==. Will try to re-download.

Failed to download from https://landsatlook.usgs.gov/tile/collection02/tm/2005/CU/011/008/LT05_CU_011008_20050630_20210429_02/LT05_CU_011008_20050630_20210429_02_SR_B3.TIF?requestSignature=eyJkb3dubG9hZEFwcCI6Ik0yTSIsImNvbnRhY3RJZCI6MjQ2MjU0NzMsImRvd25sb2FkSWQiOjY3NTY5NTI3OCwiZGF0ZUdlbmVyYXRlZCI6IjIwMjQtMDktMjZUMjI6MjU6MTQtMDU6MDAiLCJpZCI6IkxUMDVfQ1VfMDExMDA4XzIwMDUwNjMwXzIwMjEwNDI5XzAyX1NSX0IzLlRJRiIsInNpZ25hdHVyZSI6IiQ1JCRqeENqaVFqOWJFNElVcWdSZkcxOGJDXC9UcW9yUGp6U1JlaWFoW