# Fetch a random set of tiles from Landsat

The code in this notebook selects a random subset of the landsat tiles listed in landsat5list.py (which are landsat5 tiles that cover the same space & time as the corine dataset).
To use this notebook, you need to have a USGS account, and have the username and password in the USGS_USER and USGS_PASSWORD environment variables, respectively.  

Towards the bottom of the notebook are recommended methods to make sure that the retrieved tiles are actually useful, and to prevent unuseful ones from being downloaded again.

In [1]:
from landsat5fetch import *
from pathlib import Path

In [2]:
s = get_session()

In [3]:
import numpy as np
import landsat5list
storage_directory = Path('/storage/data/landsat5')
lst = np.array(landsat5list.potential_tiles)
wehave = list(storage_directory.glob('*.tif'))
candidates = list(set(lst) - set( [x.stem for x in wehave ]))
print( len(lst), len(wehave), len(candidates) )

In [10]:
# USGS seems to have a limit of 20 that it will allow you to request at a time

np.random.shuffle(candidates)
tofetch = list(candidates[:20])
tofetch

In [11]:
submit_order(list(tofetch))
# Note: if you get a 400 (BAD REQUEST) error, it is probably because you've exceeded some resource limit, not actually a bad request

In [7]:
get_open_orders()

In [4]:
downloaded = download_available_results(storage_directory)
len(downloaded)

In [None]:
# ls5munge.sh processes the data from the form we get it from landsat (a gzipped tar file) into the form we will use (a tif file)
# Ideally we'd do the landsat tile processing from here in the notebook, but it doesn't work, because I have not found any way to activate the right conda environment 
# from within a notebook. So for now, you have to open a terminal, activate fastai environment, then run ls5munge.sh manually.
#
# Usage:  ./ls5munge.sh /home/usgs/landsat5/*.tar.gz
#
# Note: it will automatically skip files that already have .tif, so you don't have to worry about telling it exactly which ones are the new ones.

In [4]:
import numpy as np
from pathlib import Path
import sys
sys.path.append('/home/firewise/')  # add parent to python path so that we can import some tools...
from multispectral import corine
from multispectral import coords
from multispectral import tools
import rasterio
tools.set_figure_width(20)

In [5]:
#tocheck = list(Path('/home/usgs/landsat5').glob('*.tif'))
tocheck = [ Path('/home/usgs/landsat5') / (x + ".tif") for x in downloaded ]
len(tocheck)

In [7]:
# Not all the files in landsat5list.py will actually be useful.
# The checking here is similar to what we do in window generation, but in this case we are looking
# at the *entire* landsat file at once.  This loop will find tiles that can't have any valid windows
# (e.g. because they only intersect an empty part of corine).
# Files identified here should both be removed from the storage directory and commented out of landsat5list.py --- in fact
# all files with the same pathrow combo --- so that we don't download them again.


def smudge(pw1, pw2):
    """Due to rounding errors, it is possible that the same geo window results in pixel windows of two different sizes
    for different data files.  Smudge adjusts a matching pair of pixel windows so that they are definitely the same
    size.   Currently nothing clever here about geo registration; just making the height/width match."""
    common_height = min(pw1.height,pw2.height)
    common_width = min(pw1.width,pw2.width)
    if pw1.width != common_width or pw1.height != common_height:
        pw1 = rasterio.windows.Window(pw1.col_off,pw1.row_off,common_width,common_height)
    if pw2.width != common_width or pw2.height != common_height:
        pw2 = rasterio.windows.Window(pw2.col_off,pw2.row_off,common_width,common_height)
    return (pw1,pw2)


failed = []
for file in tocheck:
    fp = rasterio.open(file)
    cp = corine.fetch_corine(fp.crs)
    geo_common = coords.geo_window_intersect(fp.bounds,cp.bounds)
    if geo_common is None:
        # This case shouldn't happen, and probably should be investigated.
        failed.append(file)
        print("{} does not intersect {}".format(file.name, cp.name))
        continue
    
    fp_window = coords.geo_to_pixel(fp,geo_common)
    cp_window = coords.geo_to_pixel(cp,geo_common)
    (fp_window,cp_window) = smudge(fp_window,cp_window)
    
    # check to see if the non-nodata bits overlap
    fp_patch = fp.read(7, window=fp_window)
    if not fp_patch.any():
        failed.append(file)
        print("{} is empty in intersection".format(file.name))
        continue
        
    cp_patch = cp.read(1, window=cp_window)
    if not cp_patch.any():
        failed.append(file)
        print("{} intersects empty part of corine".format(file.name))
        continue
    
    common_data = np.logical_and(fp_patch,cp_patch)
    if not common_data.any():
        failed.append(file)
        print("{} and {} have no common data".format(file.name, cp.name))
    

In [8]:
failed

In [None]:
# Show the location of the (all|failed) tiles.  You should generally expect failed tiles to be visibly outside or on the edge of the corine data.
# If they are not, it may mean the landsat data is corrupted, or a bug, or...

for file in tocheck:  # check tocheck to see all files, failed to only see failed files
    fp = rasterio.open(file)
    cp = corine.fetch_corine(fp.crs)
    tools.show_bands(cp,1,showrect=coords.geo_to_pixel(cp,fp.bounds).flatten())