In [1]:
import os
import json
import requests
import datetime

sat_api_url = "https://landsatlook.usgs.gov/sat-api"

In [21]:
min_cloud = 0
max_cloud = 50

date_min="2015-03-01"
date_max="2015-09-30"

# format datatime for query
start_date = datetime.datetime.strptime(date_min, "%Y-%m-%d")
end_date = datetime.datetime.strptime(date_max, "%Y-%m-%d") 
start = start_date.strftime("%Y-%m-%dT00:00:00Z")
end = end_date.strftime("%Y-%m-%dT23:59:59Z")


query = {
    "time": f"{start}/{end}",
    "query": {
        "bbox":[-180,40,180,80],
        "eo:cloud_cover": {"gte": min_cloud, "lt": max_cloud},
        "collection":{"eq": "landsat-c2l2-sr"}
    },
    "limit": 500 # We limit to 500 items per Page (requests) to make sure sat-api doesn't fail to return big features collection
}

In [38]:
def query_satapi(query):
    headers = {
            "Content-Type": "application/json",
            "Accept-Encoding": "gzip",
            "Accept": "application/geo+json",
        }

    url = f"{sat_api_url}/stac/search"
    data = requests.post(url, headers=headers, json=query).json()
    
    return data

In [7]:
# this is the maximum possible
data = query_satapi(query)
scenes = data['meta']['found']
print(f'{scenes} scenes')

144640 scenes


In [28]:
# for all  scences * years * bands, estimate
scenes_all = scenes * 5 * 5
print(f'{scenes_all} scenes')

3616000 scenes


In [31]:
# estimate of 70 MB per band, convert to TB
# scenes * 70 MB
size = scenes_all * (70/1024)
print(f'{size:,} GB')

247,187.5 GB


In [32]:
# Assuming requesting each file entirely once
cost_request = round((scenes_all / 1000) * 0.0004, 2)
cost_bandwidth = round(size * 0.02, 2)
print(f'${cost_request} ${cost_bandwidth:,}')

$1.45 $4,943.75


## Actual Queries

This section runs a query for each year getting the actual matches. While we return only the scene counts, the same query results if parsed and paged through could return the actual list of scenes for retrieval.

In [36]:
def query_year(year):
    '''Given the year, finds the number of scenes matching the query and returns it.'''
    date_min = '-'.join([str(year), "03-01"])
    date_max = '-'.join([str(year), "09-30"])
    start_date = datetime.datetime.strptime(date_min, "%Y-%m-%d")
    end_date = datetime.datetime.strptime(date_max, "%Y-%m-%d") 
    start = start_date.strftime("%Y-%m-%dT00:00:00Z")
    end = end_date.strftime("%Y-%m-%dT23:59:59Z")
    
    query = {
    "time": f"{start}/{end}",
    "query": {
        "bbox":[-180,40,180,80],
        "eo:cloud_cover": {"gte": min_cloud, "lt": max_cloud},
        "collection":{"eq": "landsat-c2l2-sr"}
        },
    "limit": 500 # We limit to 500 items per Page (requests) to make sure sat-api doesn't fail to return big features collection
    }
    
    data = query_satapi(query)
    scenes = data['meta']['found']
    
    return scenes

In [39]:
scene_totals = [query_year(year) for year in range(2015,2020)]

In [43]:
# compare actual to estimated scenes
(sum(scene_totals) * 5) - scenes_all

33105

In [None]:
# TODO:
# Limit to over land only