In [None]:
# General setup, as explained earlier
import logging
import os
from pprint import pprint
from urllib3.util.retry import Retry

import requests
from requests.adapters import HTTPAdapter

PLANET_API_URL = 'https://api.planet.com/data/v1'

def setup_session(api_key=None):
    """
    Initialize a requests.Session that handles Planet api key auth and retries.
    
    :param str api_key:
        A Planet api key. Will be read from the PL_API_KEY env var if not specified.
    
    :returns requests.Session session:
        A Session instance optimized for use with Planet's api.
    """
    if api_key is None:
        api_key = os.getenv('PL_API_KEY')

    session = requests.Session()
    session.auth = (api_key, '')

    retries = Retry(total=5,
                    backoff_factor=0.2,  
                    status_forcelist=[429])

    session.mount('https://', HTTPAdapter(max_retries=retries))
    return session

# uncomment to increase logging to view detailed request information
#logging.basicConfig(level=logging.DEBUG)

session = setup_session() # Or pass in an api key if the environment variable isn't set

Yeah, but how do I download data?
---------------------------------------------------

Okay, we've talked a lot about how to search for data, so let's finally start retrieving data.

Downloading data in the Planet API is a 2-step process. We need to first "activate" the asset before we can download it.  

Behind the scenes, this is because we don't store what you download in its full, ready-to-use form.  We store a much more low-level form of the data that can be processed to multiple different asset types.  However, this takes a few minutes of compute time.

Let's work with a scene you should have permission to download assets for:

In [None]:
scene = '20180227_181938_1042'
itemtype = 'PSScene4Band'

url = '{}/item-types/{}/items/{}'.format(PLANET_API_URL, itemtype, scene)
response = session.get(url)
response.raise_for_status()
info = response.json()

# Just for fun, let's display the thumbnail:
from IPython.display import Image
Image(session.get(info['_links']['thumbnail']).content)

In [None]:
# And now let's look at the response we got from the API in more detail:
pprint(info)

Hopefully, you'll see `download` permissions in the `_permissions` section.

Activation
---------------

We looked at what the `assets` url (in `_links`) returned briefly in section 2. Let's look at it in more detail now.

In [None]:
assets_url = info['_links']['assets']

res = session.get(assets_url)
res.raise_for_status()
assets = res.json()

pprint(assets)

Okay, that's a bit hard to read... Let's take a look at the structure for a single asset.

In [None]:
pprint(assets['analytic_sr'])

Note that we see `'download'` in the `_permissions` list.  Also note the `status` -- it's "inactive".  This means we need to activate the scene before we can download it.

To activate the scene, follow the `activate` url in the `_links` section:

In [None]:
response = session.get(assets['analytic_sr']['_links']['activate'])
response.raise_for_status()

# Let's have a closer look at the actual response code
print(response.status_code)

Okay, we've requested that the scene be activated.  Behind the scenes, a bunch of work is happening to turn the low-level data we store into something usable.  Why did we inspect the response code, though?

### Activation Response Codes

After hitting an activation url, you should get a response code back from the API:

* **`202`** - The request has been accepted and the activation will begin shortly. 
* **`204`** - The asset is already active and no further action is needed. 
* **`401`** - The user does not have permissions to download this file.

You can also get the same information by inspecting the `status` of the asset. The categories are `inactive`, `activating`, and `active`.

Waiting
-----------

We can't download the scene until it's active, as indicated by a 204 response code or `status: active`.  We could just wait around a few minutes, but let's automate the waiting.  (In other words, let's poll the api...)

In [None]:
import time

asset_name = 'analytic_sr'
assets_url = info['_links']['assets'] # From our earlier request

# We could also construct this if needed
print(assets_url)

while True:
    # Send a request to the assets url for this item + scene:
    res = session.get(assets_url)
    res.raise_for_status()
    assets = res.json()

    if assets[asset_name]['status'] == 'active':
        print("Asset is active and ready to download")
        break
    else:
        print("Waiting for activation of asset...")
        time.sleep(10)

# Print the asset data    
pprint(assets[asset_name])

Okay! Great! We're ready to download now!

Downloading
-------------------

Note the `location` that's now in the response for our asset.  That's what we'll follow to download the data.  However, we'll also need to take a look at its headers to determine what filename we should use.

In [None]:
download_url = assets[asset_name]['location']

# We don't want to download the full thing all at once, so we'll stream it
response = session.get(download_url, stream=True)
response.raise_for_status()

pprint(response.headers)

There's a lot of clutter there but it's standard if you're familiar with what request headers look like.  If you're not, the parts we want to look at are `Content-Disposition` and possibly `Content-Type` and `Content-Length`.

The `Content-Disposition` header tells us what name we should save the file as (by default, anyway -- you can do whatever you'd like).  The others let us know what type of file it is (in very broad terms) and its size (in bytes).

In [None]:
disposition = response.headers['Content-Disposition']
filetype = response.headers['Content-Type']
size = response.headers['Content-Length']

mb_size = int(size) / 1024**2

print('This is a {:.1f}MB {} file'.format(mb_size, filetype))

Let's take a bit of a closer look at the content disposition header.

In [None]:
print(disposition)

To extract the actual filename, we'll use a regex. If you're not familiar with regular expressions, this will find what's inside the quotes with `filename="foo"`:

In [None]:
import re
filename = re.findall(r'filename="(.+)"', disposition)[0]
print(filename)

Okay! On to actually downloading the file!

Remember the file size? This is over 100MB.  Therefore, it's best not to download it at once.  Instead, we'll download it in chunks.  Fortunately, python has some builtin functions that can do this for us so we don't need to iterate over 1KB at a time.

Let's repeat what we did before to start bringing things together.

In [None]:
import shutil

download_url = assets[asset_name]['location']

# We don't want to download the full thing all at once, so we'll stream it
response = session.get(download_url, stream=True)
response.raise_for_status()

disposition = response.headers['Content-Disposition']
filename = re.findall(r'filename="(.+)"', disposition)[0]
print('Downloading {}'.format(filename))

# shutil.copyfileobj will download this in chunks. You can do it manually if you prefer.
with open(filename, 'wb') as outfile:
    shutil.copyfileobj(response.raw, outfile)
del response

Let's double-check that it really did download the full thing (The `!` escapes to a shell in a notebook):

In [None]:
print("Downloaded {} bytes".format(os.stat(filename).st_size))

We'll skip working with the file -- that's for other workshops.  Let's move on to tying this back in to the searches we were doing earlier.

---

Activating and Download Search Results
--------------------------------------

We've spent a lot of time laying the low-level framework. Now let's walk through one of the most common tasks you'd want to use our api for.  We'll query for scenes and download them.

Again, note that there are lots of higher-level tools to do this more easily: e.g. `planet data download` in the cli tool or interactively using https://planet.com/explorer.  The point of this workshop is to show the API that those tools are using "under-the-hood".  You can do all of what we're about to do with a couple of clicks in explorer or a single command in the cli tool.  

One other note, the Orders API also helps automate this process. If you have a set of scenes and assets you want to download, it will activate them and roll them up into a single zip file for you.  It's particularly useful if you want to upload the scenes into an AWS or GCS bucket, as it can do that for you instead of delivering a zip file.  However, that's for another workshop...

For this workshop, we're going to stick with the Data API. In this case, there are 4 steps:

  1. Search for scenes
  2. Activate the asset(s) you want for those scenes
  3. Wait for them to become active
  4. Download the files for each scene/asset.

In [None]:
def search(geom, start_date, end_date, item_type, asset):
    """
    Query the Planet api for scenes overlapping an AOI within a TOI that 
    have the specified asset.
    
    :param dict geom:  A geojson geometry
    :param str start_date: An iso-8601-formatted timestamp in UTC (earliest scenes)
    :param str end_date: An iso-8601-formatted timestamp in UTC (latest scenes)
    :param str item_type: A single item type name (e.g. PSScene4Band)
    :param str asset: The asset name we're going to use.
    """
    # First let's filter for scenes where we have download permissions. This
    # serves two purposes: 1) avoid scenes that do not have the asset we want, 
    # and 2) avoid scenes we don't have access to.
    perm_filter = {
      "type": "PermissionFilter",
      "config": ["assets.{}:download".format(asset)]
    }

    # Then we'll filter for our AOI 
    geom_filter = {
      "type": "GeometryFilter",
      "field_name": "geometry",
      "config": geom
    }
    
    # And the TOI
    date_filter = {
      "type": "DateRangeFilter",
      "field_name": "acquired",
      "config": {
        "gt": start_date,
        "lte": end_date
      }
    }
    
    # Then combine the three
    and_filter = {
      "type": "AndFilter",
      "config": [perm_filter, geom_filter, date_filter]
    }
    
    request = {
      "item_types" : [item_type],
      "filter" : and_filter
    }
    
    resp = session.post("{}/quick-search".format(PLANET_API_URL), json=request)
    resp.raise_for_status()
    body = resp.json()
    
    for item in body['features']:
        yield item
    
    next_url = body['_links'].get('_next')
    while next_url:
        response = session.get(next_url)
        response.raise_for_status()
        body = response.json()
        next_url = body['_links'].get('_next')
        for item in body['features']:
            yield item

Your accounts for this exercise should have access to download data in California between October 2017 through the end of March 2018.  Feel free to substitue different AOIs and Dates here! (go to http://geojson.io to draw a different geometry if you'd like)

For now, we'll just print the names of the scenes we find:

In [None]:
geom = {
        "type": "Polygon",
        "coordinates": [
          [
            [
              -119.68505859375,
              35.131702190832634
            ],
            [
              -119.60248947143555,
              35.06611364116525
            ],
            [
              -119.57914352416992,
              35.07679117524852
            ],
            [
              -119.6714973449707,
              35.14026553479837
            ],
            [
              -119.68505859375,
              35.131702190832634
            ]
          ]
        ]
      }

asset = 'visual'
results = search(geom, '2018-01-01T00:00:00Z', '2018-01-10T00:00:00Z', 'PSScene3Band', asset)

# That's a generator. Let's expand it to a list to make it easier to reuse these results later
results = list(results)

for feature in results:
    print(feature['id'])

Okay, let's move on to the next step - Activation of the results:

In [None]:
def _fetch_assets(feature):
    # Fetch the assets section
    assets_url = feature['_links']['assets']
    resp = session.get(assets_url)
    resp.raise_for_status()
    return resp.json()

def activate(results, asset_name):
    """Activate the results of a search."""
    for feature in results:
        assets = _fetch_assets(feature)
        
        if assets[asset_name]['status'] == 'inactive':
            response = session.get(assets[asset_name]['_links']['activate'])
            response.raise_for_status()

# This will be fairly quick...    
activate(results, asset)

Now we need to wait on things to become active.  This can take awhile.

In [None]:
def wait_for_active(results, asset_name):
    """Wait for all results in a search to become active"""
    active = [False]
    
    while not all(active):
        # Getting just a touch fancier with Python. If you're not familiar with it, this is a for loop
        statuses = [_fetch_assets(item)[asset_name]['status'] for item in results]
        
        active = [item == 'active' for item in statuses]
        if not all(active):
            print("Waiting for activation of assets...")
            time.sleep(10)

# May take awhile...
wait_for_active(results, asset)

And now we'll download the files!  This might take a bit... We could do this asychronously as well. It's only a few more lines of code, but requires a bit more familiarity with Python, so we'll leave the parallelizing this out for now.

In [None]:
def download(results, asset_name):
    for feature in results:
        assets = _fetch_assets(feature)
        download_url = assets[asset_name]['location']

        # We don't want to download the full thing all at once, so we'll stream it
        response = session.get(download_url, stream=True)
        response.raise_for_status()

        # Figure out what we should call the local file
        disposition = response.headers['Content-Disposition']
        filename = re.findall(r'filename="(.+)"', disposition)[0]
        print('Downloading {}'.format(filename))

        # Download in chunks.
        with open(filename, 'wb') as outfile:
            shutil.copyfileobj(response.raw, outfile)
        del response
        
        yield filename

files = list(download(results, asset)