# Inventory and Download Landsat from USGS
Workaround Solution: Use brower-cookie3 to "sign" the URL request so a python script can stream data from USGS


In [None]:
#%pip install browser-cookie3

In [None]:
from pathlib import Path
import browser_cookie3
import requests
import shutil
import os
import json
from urllib.request import urlopen
from pystac.item import Item
import pandas as pd
from dateutil.rrule import rrule, DAILY
import datetime
from datetime import date
import glob
import tqdm
import calendar

## Collect inventory from USGS
Query guide: https://landsatlook.usgs.gov/stac-server/api.html#tag/Item-Search


Example URL:
https://landsatlook.usgs.gov/stac-server/collections/landsat-c2l1/items?limit=10000&datetime=2022-01-06T00:00:00Z/2022-01-07T00:00:00Z&fields=id,-type,-geometry,-bbox,-properties,-links,-assets,-collection,-features

In [None]:
cltns = ["landsat-c2l2-sr",	# Landsat Collection 2 Level-2 UTM Surface Reflectance (SR) Product
"landsat-c2l2-st",	# Landsat Collection 2 Level-2 UTM Surface Temperature (ST) Product
"landsat-c2ard-st",	# Landsat Collection 2 Analysis Ready Data (ARD) Level-2 UTM Surface Temperature (ST) Product
"landsat-c2l2alb-bt",	# Landsat Collection 2 Level-2 Albers Top of Atmosphere Brightness Temperature (BT) Product
"landsat-c2l3-fsca",	# Landsat Collection 2 Level-3 Fractional Snow Covered Area (fSCA) Product
"landsat-c2ard-bt",	# Landsat Collection 2 Analysis Ready Data (ARD) Level-2 UTM Top of Atmosphere Brightness Temperature (BT) Product
"landsat-c2l1",	# Landsat Collection 2 Level-1 Product
"landsat-c2l3-ba",	# Landsat Collection 2 Level-3 Burned Area (BA) Product
"landsat-c2l2alb-st",	# Landsat Collection 2 Level-2 Albers Surface Temperature (ST) Product
"landsat-c2ard-sr",	# Landsat Collection 2 Analysis Ready Data (ARD) Level-2 UTM Surface Reflectance (SR) Product
"landsat-c2l2alb-sr",	# Landsat Collection 2 Level-2 Albers Surface Reflectance (SR) Product
"landsat-c2l2alb-ta",	# Landsat Collection 2 Level-2 Albers Top of Atmosphere (TA) Reflectance Product
"landsat-c2l3-dswe",	 #Landsat Collection 2 Level-3 Dynamic Surface Water Extent (DSWE) Product
"landsat-c2ard-ta"]    #	Landsat Collection 2 Analysis Ready Data (ARD) Level-2 UTM Top of Atmosphere (TA) Reflectance Product

In [None]:
import os
for cltn in cltns:
    directory = f"../../data/landsat/stac/{cltn}"
    if not os.path.exists(directory):
        os.makedirs(directory)


In [None]:
cltn = "landsat-c2l1"

yymmdd = "2022-10-22"

url = f'''https://landsatlook.usgs.gov/stac-server/collections/{cltn}/items?limit=10000&datetime={yymmdd}T00:00:00Z/{yymmdd}T23:59:59Z&fields=id,-type,-geometry,-bbox,properties,-links,-assets,-collection,-features'''

with urlopen(url) as uo:
    data = json.load(uo)
    #print(data)
    feature_df = pd.json_normalize(data['features'])

l9 = feature_df.query('`properties.platform`=="LANDSAT_9"')

l9[l9.id.str.startswith("LT09_L1GT_137")]
stat_list = []

with urlopen(url) as uo:
    data = json.load(uo)
    #print(data)
    feature_df = pd.json_normalize(data['features'])
    l9 = feature_df.query('`properties.platform`=="LANDSAT_9"')
    l8 = feature_df.query('`properties.platform`=="LANDSAT_8"')
    stat_list.append({'date':yymmdd,'total':data['numberMatched'],'return':data['numberReturned'],'LC09':len(l9),'LC08':len(l9)})
    feature_df.drop('type',axis=1).to_csv(f'../../data/landsat/stac/{cltn}_{dt.strftime("%Y-%m-%d")}.csv',index=False)


pd.DataFrame(stat_list).query('`date` > "2021-09-27"').to_csv('../../data/landsat/day_sum_LC09_LC08.csv',index=False)
day_sum = pd.DataFrame(stat_list).query('`date` > "2021-09-27"')

pd.json_normalize(data['features']).query('`properties.platform`=="LANDSAT_9"')

### Batch processing

In [None]:
cltn

In [None]:
def inventory_collection(cltn):
    a = date(2022, 5, 1)
    b = date(2022, 11, 1)
    stat_list=[]
    for dt in rrule(DAILY, dtstart=a, until=b):
        #print(dt.strftime("%Y-%m-%d"))
        yymmdd = dt.strftime("%Y-%m-%d")
        url = f'''https://landsatlook.usgs.gov/stac-server/collections/{cltn}/items?limit=10000&datetime={yymmdd}T00:00:00Z/{yymmdd}T23:59:59Z&fields=id,-type,-geometry,-bbox,properties,-links,-assets,-collection,-features'''
        try:
            with urlopen(url) as uo:
                data = json.load(uo)
                #print(data)
                if not data['features']:
                    continue
                feature_df = pd.json_normalize(data['features'])
                l9 = feature_df.query('`properties.platform`=="LANDSAT_9"')
                l8 = feature_df.query('`properties.platform`=="LANDSAT_8"')
                stat_list.append({'date':yymmdd,'total':data['numberMatched'],'return':data['numberReturned'],'LC09':len(l9),'LC08':len(l8)})
                if len(l9)>0:
                    l9.drop('type',axis=1).to_csv(f'../../data/stac/{cltn}/csv/{cltn}_{dt.strftime("%Y-%m-%d")}_L9.csv',index=False)
                #print(stat_list[-1])
        except e:
            print(e)
    pd.DataFrame(stat_list).to_csv(f'../../data/stac/stat/{cltn}_day_sum_LC09_LC08.csv',index=False)


In [None]:
inventory_collection("landsat-c2l1")

In [None]:
for cltn in cltns[4:-1]:
    print(cltn)
    inventory_collection(cltn)

In [None]:
for cltn in cltns:
    print(cltn,pd.read_csv(f'../../data/landsat/{cltn}_day_sum_LC09_LC08.csv').LC09.sum())

## Check progress and missing files

In [None]:
cached_m2m = [fp.split('/')[-1][:-8] for fp in glob.glob("../../data/landsat/m2m_download/*/*/*B11.TIF")]

In [None]:
complete_m2m = [fp.split('/')[-1][:-10] for fp in glob.glob("../../data/landsat/m2m_download/*/*/*_stac.json")]

In [None]:
complete_jpg = [fp.split('/')[-1][:-11] for fp in glob.glob("../../data/landsat/m2m_download/*/*/*_large.jpeg")]

In [None]:
missing_stac = list(set(cached_m2m)-set(complete_m2m))

In [None]:
finished_m2m= [fp.split('/')[-1][:-10] for fp in glob.glob("../../data/stac/landsat-c2l1/json/*/*.json")]

## Compare differences and generate task lists
First run `find . -type f -name '*.tar' -exec basename {} .tar \; > landsat_avail.txt` on local inventory

In [None]:
def export_scenes_list(many_scenes,batch_tag,batch_size=100):
    batch_num = len(many_scenes) // batch_size +1
    if len(many_scenes) % batch_size == 0:
        batch_num-=1
    import os
    dest = f'../../data/landsat/task/{batch_tag}'
    try:
        os.makedirs(dest)
    except FileExistsError:
       # directory already exists
       pass
    for i in range(1,batch_num+1):
        with open(f'{dest}/scenes_{str(i).zfill(4)}.txt', 'w') as fp:
            fp.write("landsat_ot_c2_l1|displayId\n")
            for item in many_scenes[(i-1)*batch_size:i*batch_size]:
                # write each item on a new line
                fp.write(f"{item}\n")
    print('Done',batch_num)

In [None]:
import glob

In [None]:
usgs_inventory = pd.concat([pd.read_csv(f) for f in glob.glob('../../data/stac/landsat-c2l1/csv/*.csv')])

In [None]:
set_finished = set(finished)

In [None]:
usgs_inventory

In [None]:
usgs_inventory['aq_time'] = pd.to_datetime(usgs_inventory['properties.datetime'])

In [None]:
usgs_first_year = usgs_inventory[usgs_inventory['aq_time']<"2022-11-01"]

In [None]:
usgs_first_year[usgs_first_year.duplicated("properties.landsat:scene_id",False)]

In [None]:
set(usgs_first_year["properties.landsat:scene_id"].unique()) - set(usgs_first_year[usgs_first_year.id.isin(finished_m2m)]["properties.landsat:scene_id"])

In [None]:
usgs_first_year[usgs_first_year["properties.landsat:scene_id"].isin({'LT91372062022295LGN00', 'LT91372072022295LGN00', 'LT91372082022295LGN00'})].id.to_list()

In [None]:
export_list = list(set(usgs_first_year.id.to_list())-set(finished_m2m))

In [None]:
len(export_list)

In [None]:
export_list = usgs_first_year[usgs_first_year["properties.landsat:scene_id"].isin({'LT91372062022295LGN00', 'LT91372072022295LGN00', 'LT91372082022295LGN00'})].id.to_list()

In [None]:
m2m_batch2 = usgs_inventory[usgs_inventory['aq_time']>="2022-05-01"].sort_values('aq_time').id.to_list()

In [None]:
export_scenes_list(export_list,'batch_ms1',200)

In [None]:
m2m_batch2 = usgs_inventory[("2022-05-01" <= usgs_inventory['aq_time'] )&(usgs_inventory['aq_time']<= "2022-05-31")].sort_values('aq_time').id.to_list()

In [None]:
m2m_batch2 = usgs_inventory[("2021-11-30" < usgs_inventory['aq_time'] )&(usgs_inventory['aq_time']< "2022-01-01")].sort_values('aq_time').id.to_list()

In [None]:
batch3 = usgs_inventory[("2022-01-01" <= usgs_inventory['aq_time'] )&(usgs_inventory['aq_time']< "2022-02-01")].sort_values('aq_time').id.to_list()

In [None]:
set(batch3)-set(finished_m2m)

In [None]:
export_scenes_list(list(set(batch3)-set(finished_m2m)),'batch3',200)

In [None]:
year = 2022
month = 10
date1 = f"{year}-{str(month).zfill(2)}-01"
date1fix = f"{year}-{str(month).zfill(2)}-{calendar.monthrange(year, month)[1]}"
dt2 = datetime.datetime.strptime(date1, "%Y-%m-%d")+datetime.timedelta(days=calendar.monthrange(year, month)[1])
date2 = datetime.datetime.strftime(dt2,"%Y-%m-%d")

In [None]:
date1 = "2022-10-01"
date2 = "2022-11-01"

In [None]:
batch_auto = usgs_inventory[(date1 < usgs_inventory['aq_time'] )&(usgs_inventory['aq_time']<= date2)].sort_values('aq_time').id.to_list()

In [None]:
len(batch_auto)

In [None]:
len(set(batch_auto) - set(finished_m2m))

In [None]:
missings = list(set(batch_auto) - set(finished_m2m))

In [None]:
len(missings)

In [None]:
export_scenes_list(missings,f'batch_name',200)

## Official USGS script for scene download

In [None]:
#official script for m2m api

import json
import requests
import sys
import time
import argparse
import re
import threading
import datetime
import os
from dotenv import load_dotenv

load_dotenv()


path = "../../data/landsat/m2m_download" # Fill a valid download path
maxthreads = 10 # Threads count for downloads
sema = threading.Semaphore(value=maxthreads)
label = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") # Customized label using date time
threads = []
# The entityIds/displayIds need to save to a text file such as scenes.txt.
# The header of text file should follow the format: datasetName|displayId or datasetName|entityId. 
# sample file - scenes.txt
# landsat_ot_c2_l2|displayId
# LC08_L2SP_012025_20201231_20210308_02_T1
# LC08_L2SP_012027_20201215_20210314_02_T1
scenesFile = '../pyScripts/scenes2.txt'

# Send http request
def sendRequest(url, data, apiKey = None, exitIfNoResponse = True):  
    json_data = json.dumps(data)
    
    if apiKey == None:
        response = requests.post(url, json_data)
    else:
        headers = {'X-Auth-Token': apiKey}              
        response = requests.post(url, json_data, headers = headers)  
    
    try:
      httpStatusCode = response.status_code 
      if response == None:
          print("No output from service")
          if exitIfNoResponse: sys.exit()
          else: return False
      output = json.loads(response.text)
      if output['errorCode'] != None:
          print(output['errorCode'], "- ", output['errorMessage'])
          if exitIfNoResponse: sys.exit()
          else: return False
      if  httpStatusCode == 404:
          print("404 Not Found")
          if exitIfNoResponse: sys.exit()
          else: return False
      elif httpStatusCode == 401: 
          print("401 Unauthorized")
          if exitIfNoResponse: sys.exit()
          else: return False
      elif httpStatusCode == 400:
          print("Error Code", httpStatusCode)
          if exitIfNoResponse: sys.exit()
          else: return False
    except Exception as e: 
          response.close()
          print(e)
          if exitIfNoResponse: sys.exit()
          else: return False
    response.close()
    
    return output['data']

def downloadFile(url):
    sema.acquire()
    try:        
        response = requests.get(url, stream=True)
        disposition = response.headers['content-disposition']
        filename = re.findall("filename=(.+)", disposition)[0].strip("\"")
        print(f"Downloading {filename} ...\n")
        if path != "" and path[-1] != "/":
            filename = "/" + filename
        open(path+filename, 'wb').write(response.content)
        print(f"Downloaded {filename}\n")
        sema.release()
    except Exception as e:
        print(f"Failed to download from {url}. Will try to re-download.")
        sema.release()
        runDownload(threads, url)
    
def runDownload(threads, url):
    thread = threading.Thread(target=downloadFile, args=(url,))
    threads.append(thread)
    thread.start()

In [None]:
username = os.getenv("M2M_USER")
password = os.getenv("M2M_PSWD")  

print("\nRunning Scripts...\n")

serviceUrl = "https://m2m.cr.usgs.gov/api/api/json/stable/"

# login
payload = {'username' : username, 'password' : password}

apiKey = sendRequest(serviceUrl + "login", payload)

print("API Key: " + apiKey + "\n")

datasetName = "landsat_ot_c2_l1"

spatialFilter =  {'filterType' : "mbr",
                  'lowerLeft' : {'latitude' : 30, 'longitude' : -120},
                  'upperRight' : { 'latitude' : 40, 'longitude' : -140}}

temporalFilter = {'start' : '2021-10-10', 'end' : '2025-12-10'}

payload = {'datasetName' : datasetName,
                           'temporalFilter' : temporalFilter}                     

print("Searching datasets...\n")
datasets = sendRequest(serviceUrl + "dataset-search", payload, apiKey)

print("Found ", len(datasets), " datasets\n")

In [None]:
acquisitionFilter = {"end": "2021-11-30",
                             "start": "2021-10-10" }        
metadataValue = {"filterType":"value",
                  "filterId":"61af93b8fad2acf5",
                  "value":"9",
                   "operand":"="
                 }


payload = {'datasetName' : 'landsat_ot_c2_l1', 
                         'maxResults' : 50000,
                         #'startingNumber' : 1, 
                         'sceneFilter' : {'metadataFilter':metadataValue,
                                          #'spatialFilter' : spatialFilter,
                                          'acquisitionFilter' : acquisitionFilter}}

# Now I need to run a scene search to find data to download
print("Searching scenes...\n\n")   

scenes = sendRequest(serviceUrl + "scene-search", payload, apiKey)

In [None]:
username = os.getenv("M2M_USER")
password = os.getenv("M2M_PSWD")
filetype = 'bundle'

print("\nRunning Scripts...\n")
startTime = time.time()

serviceUrl = "https://m2m.cr.usgs.gov/api/api/json/stable/"

# Login
payload = {'username' : username, 'password' : password}    
apiKey = sendRequest(serviceUrl + "login", payload)    
print("API Key: " + apiKey + "\n")

# Read scenes
f = open(scenesFile, "r")
lines = f.readlines()   
f.close()
header = lines[0].strip()
datasetName = header[:header.find("|")]
idField = header[header.find("|")+1:]

print("Scenes details:")
print(f"Dataset name: {datasetName}")
print(f"Id field: {idField}\n")

entityIds = []

lines.pop(0)
for line in lines:        
    entityIds.append(line.strip())

# Search scenes 
# If you don't have a scenes text file that you can use scene-search to identify scenes you're interested in
# https://m2m.cr.usgs.gov/api/docs/reference/#scene-search
# payload = { 
#             'datasetName' : '', # dataset alias
#             'maxResults' : 10, # max results to return
#             'startingNumber' : 1, 
#             'sceneFilter' : {} # scene filter
#           }

# results = sendRequest(serviceUrl + "scene-search", payload, apiKey)  
# for result in results:
#     entityIds.append(result['entityId'])

# Add scenes to a list
listId = f"temp_{datasetName}_list" # customized list id
payload = {
    "listId": listId,
    'idField' : idField,
    "entityIds": entityIds,
    "datasetName": datasetName
}

print("Adding scenes to list...\n")
count = sendRequest(serviceUrl + "scene-list-add", payload, apiKey)    
print("Added", count, "scenes\n")

# Get download options
payload = {
    "listId": listId,
    "datasetName": datasetName
}

print("Getting product download options...\n")
products = sendRequest(serviceUrl + "download-options", payload, apiKey)
print("Got product download options\n")

# Select products
downloads = []
if filetype == 'bundle':
    # select bundle files
    for product in products:        
        if product["bulkAvailable"]:               
            downloads.append({"entityId":product["entityId"], "productId":product["id"]})
elif filetype == 'band':
    # select band files
    for product in products:  
        if product["secondaryDownloads"] is not None and len(product["secondaryDownloads"]) > 0:
            for secondaryDownload in product["secondaryDownloads"]:
                if secondaryDownload["bulkAvailable"]:
                    downloads.append({"entityId":secondaryDownload["entityId"], "productId":secondaryDownload["id"]})
else:
    # select all available files
    for product in products:        
        if product["bulkAvailable"]:               
            downloads.append({"entityId":product["entityId"], "productId":product["id"]})
            if product["secondaryDownloads"] is not None and len(product["secondaryDownloads"]) > 0:
                for secondaryDownload in product["secondaryDownloads"]:
                    if secondaryDownload["bulkAvailable"]:
                        downloads.append({"entityId":secondaryDownload["entityId"], "productId":secondaryDownload["id"]})

# Remove the list
payload = {
    "listId": listId
}
sendRequest(serviceUrl + "scene-list-remove", payload, apiKey)                

# Send download-request
payLoad = {
    "downloads": downloads,
    "label": label,
    'returnAvailable': True
}

print(f"Sending download request ...\n")
results = sendRequest(serviceUrl + "download-request", payLoad, apiKey)
print(f"Done sending download request\n") 

In [None]:
from urllib.parse import urlparse

In [None]:
for result in results['availableDownloads']:
    u = urlparse(result['url'])
    if u.path.split('/')[-1]!='gen-browse':
        

In [None]:
for result in results['availableDownloads']:       
    print(f"Get download url: {result['url']}\n" )
    runDownload(threads, result['url'])

preparingDownloadCount = len(results['preparingDownloads'])
preparingDownloadIds = []
if preparingDownloadCount > 0:
    for result in results['preparingDownloads']:  
        preparingDownloadIds.append(result['downloadId'])

    payload = {"label" : label}                
    # Retrieve download urls
    print("Retrieving download urls...\n")
    results = sendRequest(serviceUrl + "download-retrieve", payload, apiKey, False)
    if results != False:
        for result in results['available']:
            if result['downloadId'] in preparingDownloadIds:
                preparingDownloadIds.remove(result['downloadId'])
                print(f"Get download url: {result['url']}\n" )
                runDownload(threads, result['url'])

        for result in results['requested']:   
            if result['downloadId'] in preparingDownloadIds:
                preparingDownloadIds.remove(result['downloadId'])
                print(f"Get download url: {result['url']}\n" )
                runDownload(threads, result['url'])

    # Don't get all download urls, retrieve again after 30 seconds
    while len(preparingDownloadIds) > 0: 
        print(f"{len(preparingDownloadIds)} downloads are not available yet. Waiting for 30s to retrieve again\n")
        time.sleep(30)
        results = sendRequest(serviceUrl + "download-retrieve", payload, apiKey, False)
        if results != False:
            for result in results['available']:                            
                if result['downloadId'] in preparingDownloadIds:
                    preparingDownloadIds.remove(result['downloadId'])
                    print(f"Get download url: {result['url']}\n" )
                    runDownload(threads, result['url'])

print("\nGot download urls for all downloads\n")                
# Logout
endpoint = "logout"  
if sendRequest(serviceUrl + endpoint, None, apiKey) == None:        
    print("Logged Out\n")
else:
    print("Logout Failed\n")  

print("Downloading files... Please do not close the program\n")
for thread in threads:
    thread.join()

print("Complete Downloading")

executionTime = round((time.time() - startTime), 2)
print(f'Total time: {executionTime} seconds')

## Direct Download Workaround

In [None]:
# background step: login in USGS EROS so the brower cookie can skip the redirect of USGS when request

cj = browser_cookie3.firefox(domain_name='usgs.gov')

In [None]:
def cache_asset(url,path):
    r = requests.get(url, stream = True,cookies=cj)
    if r.status_code == 200:
        # Set decode_content value to True, otherwise the downloaded image file's size will be zero.
        r.raw.decode_content = True
        
        # Open a local file with wb ( write binary ) permission.
        with open(path,'wb') as f:
            shutil.copyfileobj(r.raw, f)

In [None]:
target

In [None]:
usgs_item_url = f"https://landsatlook.usgs.gov/stac-server/collections/landsat-c2l1/items/LC09_L1TP_094083_20211110_20220119_02_T1"
#read stac
r = urlopen(usgs_item_url).read()
stac_json = json.loads(r)

In [None]:

def hand_pull_product(product_id,collection="landsat-c2l1",tarfile=True,verbose=False):
    """
    product_id: e.g."LC09_L1TP_086075_20220509_20220510_02_T1"
    """
    directory = f"../../data/landsat/hand_pull/{product_id}"
    if os.path.exists(directory+".tar"):
        #print('skip',product_id)
        return
    usgs_item_url = f"https://landsatlook.usgs.gov/stac-server/collections/{collection}/items/{product_id}"
    #aws_item_url = "https://jcz3phgts3.execute-api.us-west-2.amazonaws.com/dev/getobject/collection02/level-1/standard/oli-tirs/2021/094/083/LC09_L1TP_094083_20211110_20220119_02_T1/LC09_L1TP_094083_20211110_20220119_02_T1_stac.json"
    #read stac
    r = urlopen(usgs_item_url).read()
    stac_json = json.loads(r)
    scene = Item.from_dict(stac_json)
    assets_urls = [[k,scene.assets[k].href] for k in scene.get_assets().keys() if k != 'index']
    directory = f"../../data/landsat/hand_pull/{scene.id}" #in case there is a miss match
    if not os.path.exists(directory):
        os.makedirs(directory)
    #loop run the assets
    for k,url in assets_urls:
        path = f"{directory}/{Path(url).name}"
        #print(k,path)
        if verbose:
            print(url)
        cache_asset(url,path)
    # save stac information
    with open(f"{directory}/{scene.id}_stac.json","w") as w:
        json.dump(stac_json,w)
    if tarfile:
        output = shutil.make_archive(directory, 'tar', Path(directory).parent, Path(directory).name)
        print(output)
        shutil.rmtree(directory)
    else:
        print(directory)

In [None]:

def hand_pull_files(product_id,collection="landsat-c2l1",verbose=False):
    """
    product_id: e.g."LC09_L1TP_086075_20220509_20220510_02_T1"
    """
    dt = product_id.split('_')[3]
    directory = f"../../data/landsat/m2m_download/{dt}/{product_id}"
    
    # if os.path.exists(directory+f"/{product_id}_stac.json"):
    #     print('skip',product_id)
    #     return
    usgs_item_url = f"https://landsatlook.usgs.gov/stac-server/collections/{collection}/items/{product_id}"
    #aws_item_url = "https://jcz3phgts3.execute-api.us-west-2.amazonaws.com/dev/getobject/collection02/level-1/standard/oli-tirs/2021/094/083/LC09_L1TP_094083_20211110_20220119_02_T1/LC09_L1TP_094083_20211110_20220119_02_T1_stac.json"
    #read stac
    r = urlopen(usgs_item_url).read()
    stac_json = json.loads(r)
    scene = Item.from_dict(stac_json)
    assets_urls = [[k,scene.assets[k].href] for k in scene.get_assets().keys() if k != 'index']
    if not os.path.exists(directory):
        os.makedirs(directory)
    #loop run the assets
    for k,url in assets_urls:
        
        path = f"{directory}/{Path(url).name}"
        if os.path.exists(path):
            continue
        if verbose:
            print('downloading from ',url)
        cache_asset(url,path)
        with open("../../data/landsat/task/workaround_history.txt",'a') as f:
            f.write(f"{url}\n")
    # save stac information if missing
    if not os.path.exists(f"{directory}/{scene.id}_stac.json"):
        with open(f"{directory}/{scene.id}_stac.json","w") as w:
            json.dump(stac_json,w)
        with open("../../data/landsat/task/workaround_history.txt",'a') as f:
                f.write(f"{usgs_item_url}\n")


In [None]:
hand_pull_files("LC09_L1GT_011008_20221019_20221019_02_T2",verbose=False)

In [None]:
missing_products_l1 = pd.read_csv("../../../data/Landsat/missing_id.csv").id.tolist()

In [None]:
chronological_list = sorted(missing_products_l1, key=lambda x: x.split('_')[3])

In [None]:
missing_files = ['LC09_L1TP_108078_20220520_20220520_02_T1',
 'LC09_L1TP_108082_20220520_20220520_02_T1',
 'LC09_L1TP_108083_20220520_20220520_02_T1',
 'LC09_L1GT_108057_20220520_20220520_02_T2',
 'LC09_L1TP_108233_20220520_20220520_02_T2',
 'LC09_L1TP_108062_20220520_20220520_02_T1',
 'LC09_L1TP_108070_20220520_20220520_02_T1',
 'LC09_L1TP_200044_20221015_20221015_02_T1',
 'LC09_L1TP_045016_20221017_20221018_02_T1',
 'LC09_L1TP_045014_20221017_20221018_02_T1',
 'LC09_L1TP_029047_20221017_20221017_02_T1',
 'LC09_L1TP_029041_20221017_20221017_02_T1',
 'LC09_L1TP_029035_20221017_20221017_02_T1',
 'LC09_L1TP_029044_20221017_20221017_02_T1',
 'LC09_L1TP_029033_20221017_20221017_02_T1',
 'LC09_L1TP_045007_20221017_20221018_02_T1',
 'LC09_L1TP_029046_20221017_20221017_02_T1',
 'LC09_L1TP_029042_20221017_20221017_02_T1',
 'LC09_L1TP_029043_20221017_20221017_02_T1',
 'LC09_L1TP_029036_20221017_20221017_02_T1',
 'LC09_L1TP_045008_20221017_20221018_02_T2',
 'LC09_L1TP_029037_20221017_20221017_02_T1',
 'LC09_L1TP_029031_20221017_20221017_02_T1',
 'LC09_L1TP_029045_20221017_20221017_02_T1',
 'LC09_L1TP_045015_20221017_20221018_02_T1',
 'LC09_L1TP_029032_20221017_20221017_02_T1',
 'LC09_L1TP_029034_20221017_20221017_02_T1']

In [None]:
for sid in tqdm.tqdm(missing_files):
    hand_pull_files(sid)