# Planet API Data Collection

### Imports

In [33]:
import json
import requests
import os
import time

### API Key and URL

In [34]:
PLANET_API_KEY = 'YOUR API KEY'

In [35]:
# Helper function to print formatted JSON using the json module
def printer(data):
    print(json.dumps(data, indent=2))

### Requests

In [36]:
URL = 'https://api.planet.com/data/v1'

session = requests.Session()

res = session.get(URL, auth = (PLANET_API_KEY, '') )

In [37]:
res.status_code

200

In [38]:
res.text

'{"_links": {"_self": "https://api.planet.com/data/v1/", "asset-types": "https://api.planet.com/data/v1/asset-types/", "item-types": "https://api.planet.com/data/v1/item-types/", "spec": "https://api.planet.com/data/v1/spec"}}'

In [39]:
printer(res.json())

{
  "_links": {
    "_self": "https://api.planet.com/data/v1/",
    "asset-types": "https://api.planet.com/data/v1/asset-types/",
    "item-types": "https://api.planet.com/data/v1/item-types/",
    "spec": "https://api.planet.com/data/v1/spec"
  }
}


In [40]:
print(res.json()["_links"]["asset-types"])

https://api.planet.com/data/v1/asset-types/


In [41]:
print(res.json()["_links"]["item-types"])

https://api.planet.com/data/v1/item-types/


### Statistics endpoint

In [42]:
stats_url = "{}/stats".format(URL)
print(stats_url)

https://api.planet.com/data/v1/stats


In [43]:
from planet import api

**Filter Criteria:**
- Type (type) - The type of filter being used
- Configuration (config) - The configuration for this filter
- Field Name (field_name) - The field on which to filter

**Field Filters**
- DateRangeFilter
- RangeFilter
- StringInFilter
- PermissionFilter
- GeometryFilter

**Logical Filters**
- NotFilter
- AndFilter
- OrFilter



**Camp Fire Dates:**
- Began: 2018-11-08
- Contained: 2018-11-25
- End buring: 2018-??-??

# Camp Fire Filter

In [44]:
camp_geo_filter = {"type":"GeometryFilter",
                   "field_name":"geometry",
                   "config":{"type":"Polygon",
                             "coordinates": [[
            [-122.9150390625,     40.44694705960048],
            [-121.97021484374999, 40.44694705960048],
            [-121.97021484374999, 41.07728074262537],
            [-122.9150390625,     41.07728074262537],
            [-122.9150390625,     40.44694705960048]
          ]]}}

In [45]:
camp_date_filter = {"type":"DateRangeFilter", 
                    "field_name":"acquired", 
                    "config":{"gte":"2018-11-01T00:00:00Z",
                              "lte":"2018-12-01T00:00:00Z"}}

camp_geo_filter = {"type":"GeometryFilter",
                   "field_name":"geometry",
                   "config":{"type":"Polygon",
                             "coordinates": [[
            [-122.9150390625,     40.44694705960048],
            [-121.97021484374999, 40.44694705960048],
            [-121.97021484374999, 41.07728074262537],
            [-122.9150390625,     41.07728074262537],
            [-122.9150390625,     40.44694705960048]
          ]]}}

camp_fire_filter = {"type":"AndFilter",
               "config":[camp_date_filter, camp_geo_filter]}

In [46]:

item_types = ["PSScene4Band", "REOrthoTile"]

request = {
    "item_types" : item_types,
    "interval" : "day",
    "filter" : camp_fire_filter 
}

res = session.post(stats_url, json=request)
printer(res.json())

{
  "utc_offset": "+0h",
  "interval": "day",
  "buckets": [
    {
      "count": 60,
      "start_time": "2018-11-01T00:00:00.000000Z"
    },
    {
      "count": 63,
      "start_time": "2018-11-02T00:00:00.000000Z"
    },
    {
      "count": 62,
      "start_time": "2018-11-03T00:00:00.000000Z"
    },
    {
      "count": 61,
      "start_time": "2018-11-04T00:00:00.000000Z"
    },
    {
      "count": 60,
      "start_time": "2018-11-05T00:00:00.000000Z"
    },
    {
      "count": 48,
      "start_time": "2018-11-06T00:00:00.000000Z"
    },
    {
      "count": 65,
      "start_time": "2018-11-07T00:00:00.000000Z"
    },
    {
      "count": 54,
      "start_time": "2018-11-08T00:00:00.000000Z"
    },
    {
      "count": 58,
      "start_time": "2018-11-09T00:00:00.000000Z"
    },
    {
      "count": 91,
      "start_time": "2018-11-10T00:00:00.000000Z"
    },
    {
      "count": 62,
      "start_time": "2018-11-11T00:00:00.000000Z"
    },
    {
      "count": 55,
      "start

### Ferguson Fire Filter

In [47]:
ferguson_date_filter = {"type":"DateRangeFilter",
                        "field_name":"acquired",
                         "config":{"gte":"2018-07-01T00:00:00Z",
                                   "lte":"2018-09-01T00:00:00Z"}}

ferguson_geo_filter = {"type":"GeometryFilter",
                       "field_name":"geometry",
                       "config":{"type":"Polygon",
                       "coordinates": [[
                           [-120.39916992187499, 37.58158917213053],
                           [-119.49554443359376, 36.98280911070616],
                           [-118.67156982421875, 37.58376576718623],
                           [-119.4873046875,     38.20581359813473],
                           [-120.39916992187499, 37.58158917213053]
                                           ]]}}


ferguson_fire_filter = {"type":"AndFilter",
                   "config":[ferguson_date_filter, ferguson_geo_filter]}

In [48]:
item_types = ["PSScene4Band"]

request = {
    "item_types" : item_types,
    "interval" : "day",
    "filter" : ferguson_fire_filter 
}

res = session.post(stats_url, json=request)
printer(res.json())

{
  "utc_offset": "+0h",
  "interval": "day",
  "buckets": [
    {
      "count": 90,
      "start_time": "2018-07-01T00:00:00.000000Z"
    },
    {
      "count": 92,
      "start_time": "2018-07-02T00:00:00.000000Z"
    },
    {
      "count": 95,
      "start_time": "2018-07-03T00:00:00.000000Z"
    },
    {
      "count": 107,
      "start_time": "2018-07-04T00:00:00.000000Z"
    },
    {
      "count": 112,
      "start_time": "2018-07-05T00:00:00.000000Z"
    },
    {
      "count": 122,
      "start_time": "2018-07-06T00:00:00.000000Z"
    },
    {
      "count": 128,
      "start_time": "2018-07-07T00:00:00.000000Z"
    },
    {
      "count": 96,
      "start_time": "2018-07-08T00:00:00.000000Z"
    },
    {
      "count": 96,
      "start_time": "2018-07-09T00:00:00.000000Z"
    },
    {
      "count": 75,
      "start_time": "2018-07-10T00:00:00.000000Z"
    },
    {
      "count": 117,
      "start_time": "2018-07-11T00:00:00.000000Z"
    },
    {
      "count": 85,
      "

### Woolesly Fire

In [49]:
woolesley_data_filter =  {"type":"DateRangeFilter",
                          "field_name":"acquired",
                          "config":{"gte":"2018-11-01T00:00:00Z",
                                    "lte":"2018-12-01T00:00:00Z"}}

woolesly_geo_filter = {"type":"GeometryFilter",
                       "field_name":"geometry",
                       "config":{"type":"Polygon",
                       "coordinates": [[
                                 [-119.036865234375,   33.99575015925125],
                                 [-118.54522705078126, 33.99802726234877],
                                 [-118.54248046874999, 34.3955786154917],
                                 [-119.04235839843749, 34.40464357107094],
                                 [-119.04373168945314, 33.99916579100914]
                                                 ]]}}

woolesley_fire_filter = {"type":"AndFilter",
                        "config":[woolesley_data_filter, woolesly_geo_filter]}

In [50]:
item_types = ["PSScene4Band"]

request = {"item_types" : item_types,
           "interval" : "day",
           "filter" : woolesley_fire_filter}

res = session.post(stats_url, json=request)
printer(res.json())

{
  "utc_offset": "+0h",
  "interval": "day",
  "buckets": [
    {
      "count": 21,
      "start_time": "2018-11-01T00:00:00.000000Z"
    },
    {
      "count": 15,
      "start_time": "2018-11-02T00:00:00.000000Z"
    },
    {
      "count": 26,
      "start_time": "2018-11-03T00:00:00.000000Z"
    },
    {
      "count": 23,
      "start_time": "2018-11-04T00:00:00.000000Z"
    },
    {
      "count": 16,
      "start_time": "2018-11-05T00:00:00.000000Z"
    },
    {
      "count": 18,
      "start_time": "2018-11-06T00:00:00.000000Z"
    },
    {
      "count": 23,
      "start_time": "2018-11-07T00:00:00.000000Z"
    },
    {
      "count": 21,
      "start_time": "2018-11-08T00:00:00.000000Z"
    },
    {
      "count": 26,
      "start_time": "2018-11-09T00:00:00.000000Z"
    },
    {
      "count": 23,
      "start_time": "2018-11-10T00:00:00.000000Z"
    },
    {
      "count": 27,
      "start_time": "2018-11-11T00:00:00.000000Z"
    },
    {
      "count": 11,
      "start

## Carr Fire Filter

In [51]:
carr_data_filter = {"type":"DateRangeFilter",
                          "field_name":"acquired",
                          "config":{"gte":"2018-07-01T00:00:00Z",
                                    "lte":"2018-09-15T00:00:00Z"}}

carr_geo_filter = {"type":"GeometryFilter",
                   "field_name":"geometry",
                   "config":{"type":"Polygon",
                   "coordinates": [[
                       [-122.9150390625,     40.44694705960048],
                       [-121.97021484374999, 40.44694705960048],
                       [-121.97021484374999, 41.07728074262537],
                       [-122.9150390625,     41.07728074262537],
                       [-122.9150390625,     40.44694705960048]
                                       ]]
                            }}
                             
carr_fire_filter = {"type":"AndFilter",
                    "config":[carr_data_filter, carr_geo_filter]}

In [52]:
item_types = ["PSScene4Band"]

request = {"item_types" : item_types,
           "interval" : "day",
           "filter" : carr_fire_filter}

res = session.post(stats_url, json=request)
printer(res.json())

{
  "utc_offset": "+0h",
  "interval": "day",
  "buckets": [
    {
      "count": 68,
      "start_time": "2018-07-01T00:00:00.000000Z"
    },
    {
      "count": 69,
      "start_time": "2018-07-02T00:00:00.000000Z"
    },
    {
      "count": 39,
      "start_time": "2018-07-03T00:00:00.000000Z"
    },
    {
      "count": 70,
      "start_time": "2018-07-04T00:00:00.000000Z"
    },
    {
      "count": 60,
      "start_time": "2018-07-05T00:00:00.000000Z"
    },
    {
      "count": 11,
      "start_time": "2018-07-06T00:00:00.000000Z"
    },
    {
      "count": 44,
      "start_time": "2018-07-07T00:00:00.000000Z"
    },
    {
      "count": 61,
      "start_time": "2018-07-08T00:00:00.000000Z"
    },
    {
      "count": 40,
      "start_time": "2018-07-09T00:00:00.000000Z"
    },
    {
      "count": 57,
      "start_time": "2018-07-10T00:00:00.000000Z"
    },
    {
      "count": 62,
      "start_time": "2018-07-11T00:00:00.000000Z"
    },
    {
      "count": 57,
      "start

# FULL API QUERY function

### General overview:
- Specify AOIs/ filters
    - might need to restrict the time frame 
- query the API for each filter 
    - We want separate queries for each filter because they are separate scenes
- Compile metadata - this we sort into a list of dictionaries for easy conversion to dataframe
    - Perhaps write to df - then to csv in one step.
    - Preserving "acquired" metadata is important for classification purposes.
- Need to figure out the image_ids bug. Got it to work in an additional step but this is a little clunky


- Second function
    - Loop through image_ids and activate all the analytic or basic analytic assets.
    - These generally produce a download link
        - Important to figure out how to download these iteratively and store results in a clean, low-memory format. 
        - Otherwise I'll have to run everything on AWS, buy storage, etc. 
        - This will probably be the most challenging hurdle. 
        - Not to mention first verifying that these images are what I want...
 
 - Downloading downloading downloading...
     - convert to raster? 
     - rasterio rasterio rasterio
     - 
     
 - 

In [53]:
# would ideally build this into a function (passes a list of filters)

from requests.auth import HTTPBasicAuth


# API Key stored as an env variable
PLANET_API_KEY = 'YOUR API KEY'

item_types = ['PSScene4Band']

# each query maxing out at 250 results
filters = [ferguson_fire_filter, woolesley_fire_filter, carr_fire_filter, camp_fire_filter]

big_dict_list = []
image_ids = []

for fire_filter in filters:
    # API request object
    search_request = {"interval": "day",
                      "item_types": item_types, 
                      "filter": fire_filter}

# fire off the POST request
    search_result = requests.post('https://api.planet.com/data/v1/quick-search',
                                   auth=HTTPBasicAuth(PLANET_API_KEY, ''), 
                                   json=search_request)
    
    # test
    # pull this out of the function and print "feature". Does this differ from "scene"
    # it shouldn't....
    # why is scene['id'] not working in the next assets request step ???
    
    ids = [feature['id'] for feature in search_result.json()['features']]
    image_ids.extend(ids)

    
    temp_dict = {}
    for scene in search_result.json()['features']:
        temp_dict['Self'] = scene['_links']['_self']
        temp_dict['Assets'] = scene['_links']['assets']
        temp_dict['Coordinates'] =  scene['geometry']['coordinates']
        temp_dict['Image_id'] = scene['id']
        temp_dict['Acquired timestamp'] = scene['properties']['acquired']
        temp_dict['Anomalous pixels'] = scene['properties']['anomalous_pixels']
        temp_dict['Pct Cloud Cover'] = scene['properties']['cloud_cover']
        temp_dict['Horizontal Pixels'] = scene['properties']['columns']
        temp_dict['Vertical Pixels'] = scene['properties']['rows']
        temp_dict['Sun Azimuth'] = scene['properties']['sun_azimuth']
        temp_dict['Sun elevation'] = scene['properties']['sun_elevation']
#         temp_dict['Usable data, LOL!'] = scene['properties']['usable_data']
        
        big_dict_list.append(temp_dict)



# big_dict_list
print(image_ids)

['20180822_180706_1105', '20180822_180701_1105', '20180822_180700_1105', '20180822_180714_1105', '20180822_180712_1105', '20180822_180705_1105', '20180822_180657_1105', '20180822_180715_1105', '20180822_180658_1105', '20180822_180707_1105', '20180822_180717_1105', '20180822_180703_1105', '20180822_180708_1105', '20180822_180711_1105', '20180822_180704_1105', '20180822_180709_1105', '20180822_180716_1105', '20180822_180702_1105', '20180822_180710_1105', '20180808_180612_1105', '20180808_180605_1105', '20180808_180604_1105', '20180808_180616_1105', '20180808_180601_1105', '20180808_180614_1105', '20180808_180610_1105', '20180808_180607_1105', '20180808_180611_1105', '20180808_180602_1105', '20180808_180613_1105', '20180808_180558_1105', '20180808_180600_1105', '20180808_180603_1105', '20180808_180559_1105', '20180808_180608_1105', '20180808_180615_1105', '20180808_180606_1105', '20180725_180454_1105', '20180725_180450_1105', '20180725_180501_1105', '20180725_180444_1105', '20180725_18045

In [54]:
# will build this in a new function  
item_type = ['PSScene4Band']

for image_id in image_ids[:1]:
    id0_url = 'https://api.planet.com/data/v1/item-types/{}/items/{}/assets'.format(item_type, image_id)

    request_result = requests.get(id0_url, 
                                 auth = HTTPBasicAuth(PLANET_API_KEY, ''))
    print(request_result)
    
    status = request_result.json()['analytic']['status']
    
    time.sleep(1)
    
    # active or inactive ?
    if status == 'active' or status == 'activating':
        print('Analytic asset: {}'.format(request_result.json()['analytic']['status']))    
        pass
    
    else:
        print('Analytic asset: {}'.format(request_result.json()['analytic']['status']))         
    
        #activate asset for download

        # Parse out useful links
        links = request_result.json()[u"analytic"]["_links"]
        self_link = links["_self"]
        activation_link = links["activate"]
        

        # Request activation of the 'analytic' asset:
        activate_result = requests.get(activation_link,
                                       auth=HTTPBasicAuth(PLANET_API_KEY, ''))
        
        time.sleep(1)

        
        activation_status_result = requests.get(self_link,
                                                auth=HTTPBasicAuth(PLANET_API_KEY, ''))
    
        print(activation_status_result.json()["status"])
        
        
        download_link = activation_status_result.json()["location"]
        
        print(download_link)
    
    time.sleep(2)

<Response [404]>


KeyError: 'analytic'

 ## Activation and Downloading
 
The Data API does not pre-generate assets, so they are not always immediately availiable to download. In order to download an asset, we first have to **activate** it.

Remember, earlier we decided we wanted a color-corrected image best suited for *analytic* applications. We can check the status of the analytic asset we want to download like so:
 

In [58]:
# For demo purposes, just grab the first image ID
id0 = image_ids[134]
id0_url = 'https://api.planet.com/data/v1/item-types/{}/items/{}/assets'.format(item_type, id0)

# Returns JSON metadata for assets in this ID. Learn more: planet.com/docs/reference/data-api/items-assets/#asset
sample_result = \
  requests.get(
    id0_url,
    auth=HTTPBasicAuth(PLANET_API_KEY, '')
  )

# List of asset types available for this particular satellite image
print(sample_result.json().keys())


dict_keys(['field', 'general'])


In [56]:
# This is "inactive" if the "analytic" asset has not yet been activated; otherwise 'active'
print(sample_result.json()['analytic']['status'])

KeyError: 'analytic'

In [266]:
print(sample_result.json()[u"analytic"]["_links"])

{'_self': 'https://api.planet.com/data/v1/assets/eyJpIjogIjIwMTgwODMxXzE4MTMyOF8xMDA0IiwgImMiOiAiUFNTY2VuZTNCYW5kIiwgInQiOiAiYW5hbHl0aWMiLCAiY3QiOiAiaXRlbS10eXBlIn0', 'activate': 'https://api.planet.com/data/v1/assets/eyJpIjogIjIwMTgwODMxXzE4MTMyOF8xMDA0IiwgImMiOiAiUFNTY2VuZTNCYW5kIiwgInQiOiAiYW5hbHl0aWMiLCAiY3QiOiAiaXRlbS10eXBlIn0/activate', 'type': 'https://api.planet.com/data/v1/asset-types/analytic'}


In [267]:
#activate asset for download

# Parse out useful links
links = sample_result.json()[u"analytic"]["_links"]
self_link = links["_self"]
activation_link = links["activate"]

# Request activation of the 'analytic' asset:
activate_result = \
  requests.get(
    activation_link,
    auth=HTTPBasicAuth(PLANET_API_KEY, '')
  )

In [271]:
activation_status_result = \
  requests.get(
    self_link,
    auth=HTTPBasicAuth(PLANET_API_KEY, '')
  )
    
print(activation_status_result.json()["status"])

active


In [272]:
# Image can be downloaded by making a GET with your Planet API key, from here:
download_link = activation_status_result.json()["location"]
print(download_link)

https://api.planet.com/data/v1/download?token=eyJhbGciOiJIUzUxMiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJzTXNjbnhXZllxbk5IQml0bDh3RFdZMlB4dE0ydXFKOTVWNmJCa3lOWi9vOVpuL2F3MFdXRDFxdisxcHZZVCtNRkxWQ0pZMkVNWEtNeklvaVBvdlFjdz09IiwiaXRlbV90eXBlX2lkIjoiUFNTY2VuZTNCYW5kIiwidG9rZW5fdHlwZSI6InR5cGVkLWl0ZW0iLCJleHAiOjE1NDg4MTAwOTUsIml0ZW1faWQiOiIyMDE4MDgzMV8xODEzMjhfMTAwNCIsImFzc2V0X3R5cGUiOiJhbmFseXRpYyJ9.KFT1ICl2zU1ThS5g7sM1tET5vTDM3a5HqpD9w2LYlxYmBfimOLWvtP7aPIdVq85PVvrWUlNVL3xmNzIitNoWhw


# Plotting the Tif

In [None]:
def sample_images(tags, n=None):
    """Randomly sample n images with the specified tags."""
    condition = True

    for tag in tags:
        condition = condition & target[tag] == 1
    if n is not None:
        return target[condition].sample(n)
    else:
        return target[condition]

In [None]:
def load_image(filename):
    """Return a 4D (r, g, b, nir) numpy array with the data in the specified TIFF filename."""
    path = os.path.abspath(os.path.join(PLANET_KAGGLE_TRAIN_ROOT, filename + ".tif"))
    if os.path.exists(path):
        with rasterio.open(path) as src:
            b, g, r, nir = src.read()
            return np.dstack([r, g, b, nir])

In [None]:
image_path = sample_images(['primary', 'water', 'road'], n=1)['image_name'].iloc[0]
rgbn_image = load_image(image_path)
rgb_image = rgbn_image[:,:,:3]
r, g, b, nir = rgbn_image[:, :, 0], rgbn_image[:, :, 1], rgbn_image[:, :, 2], rgbn_image[:, :, 3]

plt.figure(figsize = (7,5))
plt.title(label = "{}: {}".format(image_path, target.iloc[int(image_path.lstrip("train_"))]['labels']))
for slice_, name, color in ((r,'r', 'red'),(g,'g', 'green'),(b,'b', 'blue'), (nir, 'nir', 'magenta')):
    plt.hist(slice_.ravel(), bins=100, 
             range=[0,rgb_image.max()], 
             label=name, color=color, histtype='step', )
plt.legend();

In [None]:
fig = plt.figure()
fig.set_size_inches(12, 4)
for i, (x, c) in enumerate(((r, 'r'), (g, 'g'), (b, 'b'), (nir, 'near-ir'))):
    a = fig.add_subplot(1, 4, i+1)
    a.set_title(c)
    plt.imshow(x)

In [None]:
path = os.path.join(PLANET_KAGGLE_TRAIN_ROOT, image_path + '.tif')
img = io.imread(path)
bgr_image = img[:,:,:3]
rgb = bgr_image[:, :, [2,1,0]]

In [None]:
imshow(rgb)