Note: to run this file via a Jupyter notebook, you must first authenticate the Google Earth Engine code with the `Earth Engine Authentication.ipynb` file. Run all the code in that file, it will ask you to authenticate with your Google account, then you can run this file.

In [1]:
import time
import subprocess

import numpy as np

import rasterio

import ee
import ee.batch

from IPython.display import Image
from IPython.core.display import HTML

ee.Initialize() # needed for google earth engine

## Misc

In [2]:
def get_bounding_box(left, bottom, right, top):
    '''Makes a 'region' that Google Earth Engine understands'''  
    bb = [
        [left, top],
        [left, bottom],
        [right, bottom],
        [right, top]
    ]
    return bb

In [3]:
# bounding box for the US - calculated from the 20m TIGER Line Cartographic Boundary shapefiles +0.5 degree buffer
left, bottom, right, top = -125.225839, 23.998131, -66.449895, 49.884358

In [None]:
"-125", "24", "-65", "50",

In [4]:
usa_bounding_region = get_bounding_box(left, bottom, right, top)

## Download data

Here is where the magic happens...

In [5]:
l8sr = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR'); # select an image collection, here we use Landsat 8 surface reflectance

In [6]:
def mask_and_convert(image):
    '''Taken from the Landsat8 Surface Reflectance example on the code editor in Earth Explorer
    
    This method will mask out pixels that the Landsat 8 QA bands think are clouds.
    Note:
        this is __different__ from CLOUD_COVER as here we are filtering out pixels instead of entire images
    '''
    cloudShadowBitMask = ee.Number(2).pow(3).int()
    cloudsBitMask = ee.Number(2).pow(5).int()

    qa = image.select('pixel_qa');
    mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0) and qa.bitwiseAnd(cloudsBitMask).eq(0)
    
    return image.updateMask(mask);

In [7]:
'''Code for customizing which images we want to take the median of.

In each case we start with the _entire_ set of Landsat 8 surface reflectance images,
`ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')`, then filter on multiple criteria, and finally map the remaining images
through the function we defined above. I've broken these collection-->filter-->filter-->map steps down by year in case
we want to select images from a different part of each year (e.g. so we can do things like get the median image
over all non cloudy images from June in the range 2013-2015). Note: the CLOUD_COVER filter will remove entire images from
the collection, as opposed to our mapping function, which will remove pixels.
'''

CLOUD_COVER = 100
MONTH_START = "01"
MONTH_END = "3"
DAY_END = "31"

DO_SECOND_RANGE = True
MONTH2_START = "10"
MONTH2_END = "12"
DAY2_END = "31"


image_collection = (ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')
                    .filterDate('2013-%s-01' % (MONTH_START), '2013-%s-%s' % (MONTH_END, DAY_END))
                    #.filter(ee.Filter.lt('CLOUD_COVER', CLOUD_COVER))
                    .map(mask_and_convert)
                   )

for i in range(2014,2018):
    temp_collection = (ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')
                       .filterDate("%d-%s-01" % (i, MONTH_START), "%d-%s-%s" % (i, MONTH_END, DAY_END))
                       #.filter(ee.Filter.lt('CLOUD_COVER', CLOUD_COVER))
                       .map(mask_and_convert)
                      )
    image_collection = image_collection.merge(temp_collection)
    
if DO_SECOND_RANGE:
    print("Including second range")
    for i in range(2014,2018):
        temp_collection = (ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')
                           .filterDate("%d-%s-01" % (i, MONTH2_START), "%d-%s-%s" % (i, MONTH2_END, DAY2_END))
                           #.filter(ee.Filter.lt('CLOUD_COVER', CLOUD_COVER))
                           .map(mask_and_convert)
                          )
        image_collection = image_collection.merge(temp_collection)

image_collection = ee.ImageCollection(image_collection)
output_image = image_collection.median()

In [8]:
# which bands are available
all_bands = output_image.bandNames().getInfo()
print(all_bands)
selected_bands = ['B1','B2','B3','B4','B5','B6','B7','B10']
selected_bands = ["B2", "B3", "B4"]

['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B10', 'B11', 'sr_aerosol', 'pixel_qa', 'radsat_qa']


In [9]:
# we can select which bands we want to be included in our export (this isn't necessary if we want all)
output_image = output_image.select(selected_bands)

In [10]:
'''Here we start download tasks for each (y_step, x_step) sized square in our area of interest. This will
result in a grid of satellite image patches covering our entire area. These tasks will show up in the "Tasks"
tab at https://code.earthengine.google.com/, and when finished, will generate large TIFF files in you Google Drive account.

This is a fire and forget process, it creates a bunch of task objects that are running on Google's servers.
'''

task_list = [] # easy way to keep track of how far our download is

# these values should evenly divide 120 in order to have a integer number of 0.0083 x 0.0083 degree pixels in each tile
y_step = 10.0 # height of each patch (in degrees)
x_step = 10.0 # width of each patch (in degrees)

total_patches = 0 # keep track of the total number of patches we've downloaded
for y, t_top in enumerate(np.arange(top, bottom, -y_step)):
    for x, t_left in enumerate(np.arange(left, right, x_step)):
        
        t_right = min(t_left+x_step, right)
        t_bottom = max(t_top-y_step, bottom)
        
        task = ee.batch.Export.image.toDrive(
            output_image,
            "Export_%d_%d" % (x,y), # name of this task 
            "landsat_output", # name of Google Drive folder the results will be stored in, doesn't need to exist
            fileNamePrefix="Export_%d_%d" % (x,y),
            region=str(get_bounding_box(t_left, t_bottom, t_right, t_top)),
            scale=1000, # resolution in meters
            crs='EPSG:4269',
            maxPixels=1e10,
            shardSize=256, # not sure what this does, fileDimension has to be a multiple of this
            fileDimensions=16384, # this is the maximum height or width of a file (in pixels), if we request an export that has a height or width that is larger than this, then it will be broken up and will be messy
            skipEmptyTiles=True
        )
        task.start()
        task_list.append(task)
        total_patches += 1
        
        time.sleep(1) # chill so Google doesn't get mad at us, polite but maybe unecessary

In [11]:
# We can run this periodically instead of going to the earth explorer website to see how far along we are 

num_complete = 0
for task in task_list:
    if task.active():
        pass
    else:
        num_complete += 1

print("Finished %d/%d" % (num_complete, total_patches))

Finished 0/18


## Landsat 7 TOA downloads

In [14]:
'''Here we look at all of the possible "images" in the Landsat 7 TOA_1YEAR collection.

Unlike the Tier 1 Landsat 7/8 imagery collections, this image collection only has a few images - 1 for each year.
Now, instead of doing the preprocessing steps like we did before, we can just pick the image we want, and download from that
instead.
'''
#L7 = ee.ImageCollection('LANDSAT/LE7_TOA_3YEAR')
L7 = ee.ImageCollection('LANDSAT/LE7_TOA_1YEAR')

l7_info = L7.getInfo()

print(l7_info["features"][0].keys()) # interesting things to look at
for feature in l7_info["features"]:
    print(feature["id"])

dict_keys(['type', 'bands', 'version', 'id', 'properties'])
LANDSAT/LE7_TOA_1YEAR/1999
LANDSAT/LE7_TOA_1YEAR/2000
LANDSAT/LE7_TOA_1YEAR/2001
LANDSAT/LE7_TOA_1YEAR/2002
LANDSAT/LE7_TOA_1YEAR/2003
LANDSAT/LE7_TOA_1YEAR/2004
LANDSAT/LE7_TOA_1YEAR/2005
LANDSAT/LE7_TOA_1YEAR/2006
LANDSAT/LE7_TOA_1YEAR/2007
LANDSAT/LE7_TOA_1YEAR/2008
LANDSAT/LE7_TOA_1YEAR/2009
LANDSAT/LE7_TOA_1YEAR/2010
LANDSAT/LE7_TOA_1YEAR/2011
LANDSAT/LE7_TOA_1YEAR/2012
LANDSAT/LE7_TOA_1YEAR/2013
LANDSAT/LE7_TOA_1YEAR/2014


In [15]:
# now we can use this `output_image` in the `ee.batch.Export.image.toDrive` function

output_image = ee.Image("LANDSAT/LE7_TOA_1YEAR/2000")
#output_image = ee.Image("LANDSAT/LE7_TOA_1YEAR/2010")
#output_image = ee.Image("LANDSAT/LE7_TOA_1YEAR/2013")