# Download Sentinel 2 data from [SciHub](https://scihub.copernicus.eu/dhus) 
(other missions are available...)

Based upon:
http://geoinformaticstutorial.blogspot.com/2015/10/batch-downloading-sentinel-images-from.html

Adjustments were required to switch to python 3:
* urllib2 is now part of urllib
* specifying that the text file needed to write to binary 

        textfile = open('data/test.xml', 'wb')   

Also, the request string needed to be modified to include brackets around the search string eg.

        ?q=( <search string in here>)
The code was further modified to accept a shapefile (or other geographic format) from which the bounding box for the request query could be made.  

Note that a login is required for SciHub, so this would need to be set up first.

In [None]:
import geopandas as gpd
from urllib import request
import xml.etree.ElementTree as etree
import os

Username and password information are stored in a separate module.

In [None]:
import info

This function gets the bounding box of the supplied extents file.  It makes sure that the returned bounding box is in WGS84 format.

In [None]:
def getwgs84bbox(extentfile):
    """
    get the bounding box of all features from a shapefile or other 
    geographic data format supported by geopandas.
    Returns the bbox in wkt format
    """

    gdf = gpd.read_file(extentfile)

    # convert to wgs
    gdf = gdf.to_crs({'init': 'epsg:4326'})    
    
    bboxlist = gdf.total_bounds

    minx = bboxlist[0]
    miny = bboxlist[1]
    maxx = bboxlist[2]
    maxy = bboxlist[3]

    botleft  = f"{minx} {miny}"
    topleft  = f"{minx} {maxy}"
    topright = f"{maxx} {maxy}"
    botright = f"{maxx} {miny}"

    # usually outer polygon rings go in a clockwise direction
    # the website returned a polygon drawn in an anti-clockwise direction from a drawn rectangle so this has been replicated.
    bboxs = f"POLYGON(({botleft},{botright},{topright},{topleft},{botleft}))"

This function is the one that actually makes the request to SciHub and downloads files according to the response.

It builds a string from the inputs that is used to make a request to SciHub.  The response is in xml and this is parsed to pick out information used to build the url to download the imagery.

Note that if a file has already been downloaded, it won't be requested again.

In [None]:
def sentinel_lookout(extentfile, startdate, enddate, 
        platform, product, cloudcover):
    """
    Search for and download Sentinel files from SciHub
    Parameters
    ----------
    extentfile : string
        Path to extent file.  This can be a shapefile or other dataset that 
        geopandas can read
    startdate : string
        The start date for the search in the format YYYY-MM-DD
    enddate : string
        The end date for the search in the format YYYY-MM-DD
    platform : string
        The Sentinel platform eg Sentinel-2
    product : string
        The product to use in the search
    cloudcover : string
        The percentage cloud cover to use in the search.
        Must be in the format [<start percentage> <end percentage>]
    """

    # get the bounding box
    bbox = getwgs84bbox(extentfile)

    # build the date element
    sdate = f"[{startdate}T00:00:00.000Z TO {enddate}T23:59:59.999Z]"
    edate = f"[{startdate}T00:00:00.000Z TO {enddate}T23:59:59.999Z]"

    # authenticate at scihub webpage
    url =  'https://scihub.copernicus.eu/dhus/'
    username = info.username
    password = info.password
    password_mgr = request.HTTPPasswordMgrWithDefaultRealm()

    password_mgr.add_password(None, url, username, password)
    handler = request.HTTPBasicAuthHandler(password_mgr)
    opener = request.build_opener(handler)
    request.install_opener(opener)

    requeststring = f'''{url}search?q=( footprint:"Intersects({bbox})" ) 
                        AND ( beginPosition:{sdate} 
                        AND endPosition:{edate} ) 
                        AND (platformname:{platform} 
                        AND producttype:{product} 
                        AND cloudcoverpercentage:{cloudcover})'''

    urlrequest = request.quote(requeststring,
                            ':()[]/?=,&')
    # read the response into page and write it to a xml-file
    page = request.urlopen(urlrequest).read()
    textfile = open('data/response.xml', 'wb')
    textfile.write(page)
    textfile.close()

    # parse the xml file
    # the entry tag contains the results
    tree = etree.parse('data/response.xml')
    entries = tree.findall('{http://www.w3.org/2005/Atom}entry')
    print ("Number of Scenes Found: ", len(entries))
    for entry in range(len(entries)):
        # the uuid element is used to create the path to the file
        uuid_elem = entries[entry].find('{http://www.w3.org/2005/Atom}id')
        sentinel_link = f"https://scihub.copernicus.eu/dhus/odata/v1/Products('{uuid_elem.text}')/$value"
        
        # the title element contains the corresponding file name
        title_elem = entries[entry].find('{http://www.w3.org/2005/Atom}title')
        
        # destinationpath with filename where download to be stored
        destinationpath =  f"data/{title_elem.text}.zip"
        
        print(f"Scene {entry + 1} of {len(entries)}")
        
        # check if file has already been downloaded
        print (sentinel_link)
        if os.path.exists(destinationpath):
            print (title_elem.text, 'already downloaded')
            
            continue
        
        # if not, download file and read
        print ("Downloading ", title_elem.text)
        downloadfile = request.urlopen(sentinel_link)
        data =  downloadfile.read()
        
        # write the download into the destination zipfile
        with open(destinationpath, "wb") as code:
            code.write(data)

    print ("Download complete")


Next run the code to carry out the download, supplying:
* the start and end dates of the search, 
* a file specifying the area of interest,
* the satellite platform,
* the specific product,
* the percentage cloud cover.  Note the specific format required for this input (eg [0 TO 9.4])

In [None]:
startdate = "2018-06-15"
enddate = "2018-07-22"
extentfile = "data/iow_mask.shp"
platform = "Sentinel-2"
product = "S2MSI2A"
cloudcover = "[0 TO 5]"

sentinel_lookout(extentfile, startdate, enddate, platform, product, cloudcover)

Functionality could be modified so that bounding boxes of available images are derived from the xml response.  These extents could then be used to decide which files to actually download.

This is the line in the xml response that could be used for that:
    
    <str name="footprint">POLYGON ((-1.561123205595257 51.44235231685392,0.017025008525814 51.41234265506691,-0.046130286859295 50.42629366061962,-1.591284890362356 50.45527216066188,-1.561123205595257 51.44235231685392))</str>