In [1]:
import os
import datetime
import time
import json
import pandas as pd
import geopandas as gpd
import numpy as np
import requests
import shutil


import rasterio
from time import sleep
from rasterio import plot
from shapely.geometry import MultiPolygon, shape, Point
from shapely_geojson import dumps

from pathlib import Path
from pprint import pprint
from zipfile import ZipFile

from planet import api
from planet.api import filters

In [2]:
from tqdm.auto import tqdm

to use your own api keys and parameters, copy paste the `parameters.py.dist` file in the same folder and remove the `.dist` extention. You can then replace the string with your own keys. only the .dist will be pushed to the dist git rep. 

In [3]:
from parameters import *

In [4]:
def save_thumb(metadata_df):
    """ From the metadata dataframe, save the thumbnail
    in the corresponding folder:
    
    Args:
        metadata_df (pd.DataFrame)
        
    Return:
        stores thumbnails in folder
    """
    session = requests.Session()
    session.auth = (PLANET_API_KEY, '')
    auth = session.auth
    
    for index, row in metadata_df.iterrows():
        url = row.thumbnail
        date = row.date
        item_type = row.item_type
        cloud_cover = row.cloud_cover
        id_ = row.id
        sample_id = row.sample_id
        
        thumb_name = f'it{item_type}_cc{cloud_cover}_y{date.year}m{date.month}_{id_}.jpg'
        
        thumb_path = os.path.join(os.getcwd(),'thumbs', 
                                  str(sample_id), 
                                  str(date.year))
        
        Path(thumb_path).mkdir(parents=True, exist_ok=True)

        r = requests.get(url, auth=auth, stream=True)
        if r.status_code == 200:
            with open(os.path.join(thumb_path, thumb_name), 'wb') as f:
                r.raw.decode_content = True
                shutil.copyfileobj(r.raw, f)

In [5]:
def build_request(aoi_geom, start_date, stop_date, cloud_cover=100):
    """build a data api search request for PS imagery.
    
    Args:
        aoi_geom (geojson): 
        start_date (datetime.datetime)
        stop_date (datetime.datetime)
    
    Returns:
        Request
    """
    
    query = filters.and_filter(
        filters.geom_filter(aoi_geom),
        filters.range_filter('cloud_cover', lte=cloud_cover),
        filters.date_range('acquired', gt=start_date),
        filters.date_range('acquired', lt=stop_date)
    )
    
    # Skipping REScene because is not orthorrectified and 
    # cannot be clipped.
    
    return filters.build_search_request(query, [
        'PSScene3Band', 
        'PSScene4Band', 
        'PSOrthoTile',
        'REOrthoTile',])

In [6]:
# search the data api
def search_data_api(request, client, limit=1000000):
    """ Search items from a given request.
    
    """
    result = client.quick_search(request)
    
    # this returns a generator
    return result.items_iter(limit=limit)

In [7]:
def get_items(id_name, request):
    """ Get items using the request with the given parameters
    
    Args:
        row (geopandas.DataFrame.row): 
            gpd.df.row with two columns: id(index) and geometry
        
        start_date:
        
    """
    
    items = list(search_data_api(request, client))
    
    return [id_name, items]

In [8]:
def get_dataframe(items):
    
    items_metadata = [(f['properties']['acquired'],
                     f['id'], 
                     f['properties']['item_type'],
                     f['_links']['thumbnail'],
                     f['_permissions'],
                     f['geometry'],
                     f['properties']['cloud_cover'],
                     f
                    ) for f in items[1]]
    
    # Store into dataframe
    df = pd.DataFrame(items_metadata)
    df[0] = pd.to_datetime(df[0])
    df.columns=[
        'date', 
        'id', 
        'item_type', 
        'thumbnail', 
        'permissions', 
        'footprint', 
        'cloud_cover', 
        'metadata'
    ]
    df['sample_id'] = items[0]
    df.sort_values(by=['date'], inplace=True)
    df.reset_index()
    
    return df

In [9]:
def add_cover_area(metadata_df, sample_df):
    
    for idx, row in metadata_df.iterrows():
        
        g1 = sample_df.at[row.sample_id, 'geometry'] # sample geometry
        g2 = shape(row.footprint) # footprint geometry
        metadata_df.at[idx, 'cover_perc'] = (g1.intersection(g2).area/g1.area)

In [10]:
def build_order_from_metadata(metadata_df, samples_df, sample_id):
    
    filtered_df = metadata_df[metadata_df.sample_id==sample_id]
    
    items_by_type = [(item_type, filtered_df[filtered_df.item_type == item_type].id.to_list())
              for item_type in filtered_df.item_type.unique()]
    
    products_bundles = {
        
        # Is not possible to ask for analytic_dn in PSScene3Band, so the next option is visual
        # for more info go to https://developers.planet.com/docs/orders/product-bundles-reference/
        'PSScene3Band': "analytic,visual",
        'PSScene4Band': "analytic_udm2,analytic_sr,analytic",
        'PSOrthoTile': "analytic_5b_udm2,analytic_5b,analytic_udm2,analytic",
        'REOrthoTile': "analytic",
    }

    products_order = [
        {
            "item_ids":v, 
            "item_type":k, 
            "product_bundle": products_bundles[k]
        } for k, v in items_by_type
    ]
    
    # clip to AOI
    aoi_geojson = json.loads(dumps(samples_df.at[sample_id, 'geometry']))
    tools = [{
        'clip': {
            'aoi': aoi_geojson
        }
    },]
    
    order_request = {
        'name': f'sample_{str(sample_id)}',
        'products': products_order,
        'tools': tools,
        'delivery': {
            'single_archive': True,
            'archive_filename':'{{name}}_{{order_id}}.zip',
            'archive_type':'zip'
        },
            'notifications': {
                       'email': False
        },
    }
    return order_request

In [11]:
def score_items(dataframe, *args):
    """Filter and score each item according to the season and item_type
    
    Return:
        Scored items dataframe.
        
    """
    # Create a copy to avoid mutate the initial df
    df = dataframe.copy()
    
    item_count_per_year = dict(df.groupby(df.date.dt.year).size())
    
    for k_year in item_count_per_year.keys():
        
        # Filter only years with more than one image
        if item_count_per_year[k_year] > 1:
            for idx, row in metadata_df.iterrows():
                
                month = row.date.month

                df.at[idx, 'season_score'] = months_score[month]
                df.at[idx, 'item_score'] = item_type_score[row['item_type']]
                df.at[idx, 'cloud_score'] = cloud_score(row['cloud_cover'])
                df.at[idx, 'covered_area'] = cover_score(row['cover_perc'])
    
    df['total_score'] = df.season_score + \
                        df.item_score + \
                        df.cloud_score + \
                        df.covered_area
    
    df = df.sort_values(by=['total_score', 'date'], ascending=False)

    return df

In [12]:
def get_one_item_per_year(scored_items_df):
    
    df = scored_items_df.copy()
    df['year'] = df.date.dt.year
    df = df.drop_duplicates(subset=['year'], keep='first')
    df = df.sort_values(by=['date'], ascending=False)
    
    return df

In [13]:
def track_order(order_id, client, num_loops=50):
    count = 0
    while(count < num_loops):
        count += 1
        order_info = client.get_individual_order(order_id).get()
        state = order_info['state']
        print(state)
        success_states = ['success', 'partial']
        if state == 'failed':
            raise Exception(response)
        elif state in success_states:
            break
        
        time.sleep(10)

# 1. Search items
### Get the samples dataframe

From a geojson plots file, create a geo pandas dataframe to store the geometries and the id of each plot, it'll be used as a geometry filter and to calculate the % of area covered by the items.

In [14]:
samples_gdf = pd.read_pickle('shp/samples.p')

### Connect to client

In [15]:
client = api.ClientV1(api_key=PLANET_API_KEY)

### Define filters

In [16]:
# define test data for the filter
start_date = datetime.datetime(2009, 1, 1)
stop_date = datetime.datetime(2020, 12, 31)
cloud_cover_lte = 0.02
minimum_covered_area = 90 # included

### Define scores

In [17]:
# item_type_score
item_type_score = {
    'PSScene4Band':8, 
    'PSScene3Band':8, 
    'PSOrthoTile':10,
    'REOrthoTile':0,
    'SkySatScene':0,
}

# season score
months_score = {
    1: 5, 7:0,
    2: 5, 8:0,
    3: 5, 9:0,
    4: 0, 10:7,
    5: 0, 11:10,
    6: 0, 12:10,
}

# cloud_score

def cloud_score(cloud_cover):
    """ Define the cloud cover threshold and score
    
    1 = 1%
    
    """
    cloud_cover = cloud_cover*100
    
    if cloud_cover == 0:
        return 10
    elif cloud_cover <= 1 and cloud_cover > 0:
        return 5
    else:
        return 0
    
# Covered area

def cover_score(covered_area):
    """Define the cover area threshold and score
    """
    covered_area = covered_area*100
    
    if covered_area >= 99:
        return 10
    
    elif covered_area >= 95:
        return 5
    
    else:
        return 0
    


## OPTION: 1.1 Get items for individual samples ((optional))
### Get items and metadata using filters

In [18]:
# Define AOI, by selecting the first row of the samples geodataframe
# For this example, we are going to use the first sample
row_number = 0
aoi_geometry = json.loads(dumps(samples_gdf.iloc[row_number].geometry))
sample_id = samples_gdf.iloc[row_number].name

In [19]:
request = build_request(aoi_geometry, start_date, stop_date, cloud_cover_lte)
items = get_items(sample_id, request)

# Transform items into a pandas dataframe with useful columns
metadata_df = get_dataframe(items)

### Calculate percentage of covered area

Calculate the percentage of covered area from the sample area with the item footprint

In [20]:
# Mutate metadata_df and add the percentage of cover area
add_cover_area(metadata_df, samples_gdf)

In [21]:
# Remove items that are under 90% of covered area
metadata_df = metadata_df[metadata_df.cover_perc >= (minimum_covered_area/100)]

### Score items


In [22]:
scored_items = score_items(metadata_df, item_type_score, months_score, cloud_score, cover_score)

In [23]:
selected_items = get_one_item_per_year(scored_items)

### ((Optional)): Export thumbnails
Create thumbnails from the selected items (dataframe) and store them into a structured folder

In [24]:
save_thumb(selected_items)

## OPTION 1.2 Get items for all plots and store into a big df
### Loop over all plots
Loop over all plots and get the items.

In [25]:
# Create a list of dataframes 

df_list = []
pbar = tqdm(total = len(samples_gdf))

for index, row in samples_gdf.iterrows():
    
    aoi_geometry = json.loads(dumps(row.geometry))
    sample_id = row.name
    
    request = build_request(aoi_geometry, start_date, stop_date, cloud_cover_lte)
    items = get_items(sample_id, request)
    
    # Transform items into a pandas dataframe with useful columns
    metadata_df = get_dataframe(items)
    
    # Mutate metadata_df and add the percentage of cover area
    add_cover_area(metadata_df, samples_gdf)
    
    # Remove items that are under the minimum_covered_area threshold
    metadata_df = metadata_df[metadata_df.cover_perc >= (minimum_covered_area/100)]
    
    # Create a score for each item
    scored_items = score_items(metadata_df, item_type_score, months_score, cloud_score, cover_score)
    
    # Filter scored_items and get only one per year
    selected_items = get_one_item_per_year(scored_items)
    
    # Append selected_items to a list
    df_list.append(selected_items)
    
    del metadata_df, scored_items, selected_items
    pbar.update(1)
    
pbar.close()

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [26]:
# Concatenate dataframes from the df_list
all_df = pd.concat(df_list)

In [33]:
all_df[all_df.sample_id=='8704_21284']

Unnamed: 0,date,id,item_type,thumbnail,permissions,footprint,cloud_cover,metadata,sample_id,cover_perc,season_score,item_score,cloud_score,covered_area,total_score,year
65,2020-02-05 10:14:07.940641+00:00,3108772_3042121_2020-02-05_0f25,PSOrthoTile,https://tiles.planet.com/data/v1/item-types/PS...,"[assets.analytic:download, assets.analytic_dn:...","{'coordinates': [[[-1.481489812916189, 6.50694...",0.0,{'_links': {'_self': 'https://api.planet.com/d...,8704_21284,1.0,5.0,10.0,10.0,10.0,35.0,2020
115,2019-12-29 10:12:49.102889+00:00,2981683_3042121_2019-12-29_103d,PSOrthoTile,https://tiles.planet.com/data/v1/item-types/PS...,"[assets.analytic:download, assets.analytic_dn:...","{'coordinates': [[[-1.52775255399863, 6.507072...",0.0,{'_links': {'_self': 'https://api.planet.com/d...,8704_21284,1.0,10.0,10.0,10.0,10.0,40.0,2019
246,2018-11-25 10:17:14.081815+00:00,20181125_101714_0f4a,PSScene3Band,https://tiles.planet.com/data/v1/item-types/PS...,"[assets.analytic:download, assets.analytic_dn:...","{'coordinates': [[[-1.652283204177246, 6.66958...",0.0,{'_links': {'_self': 'https://api.planet.com/d...,8704_21284,1.0,10.0,8.0,10.0,10.0,38.0,2018
325,2017-12-23 09:53:15.773293+00:00,1008739_3042121_2017-12-23_1035,PSOrthoTile,https://tiles.planet.com/data/v1/item-types/PS...,"[assets.analytic:download, assets.analytic_dn:...","{'coordinates': [[[-1.479816114491128, 6.50694...",0.0,{'_links': {'_self': 'https://api.planet.com/d...,8704_21284,1.0,10.0,10.0,10.0,10.0,40.0,2017
368,2016-12-19 08:30:50.989551+00:00,20161219_083050_1_0c59,PSScene4Band,https://tiles.planet.com/data/v1/item-types/PS...,"[assets.analytic:download, assets.analytic_dn:...","{'coordinates': [[[-1.48124897360219, 6.623947...",0.01,{'_links': {'_self': 'https://api.planet.com/d...,8704_21284,1.0,10.0,8.0,5.0,10.0,33.0,2016
388,2015-12-12 11:12:26+00:00,20151212_111226_3042121_RapidEye-5,REOrthoTile,https://tiles.planet.com/data/v1/item-types/RE...,"[assets.analytic:download, assets.analytic_sr:...","{'coordinates': [[[-1.488506318075318, 6.50699...",0.0,{'_links': {'_self': 'https://api.planet.com/d...,8704_21284,1.0,10.0,0.0,10.0,10.0,30.0,2015
389,2014-12-28 11:21:59+00:00,20141228_112159_3042121_RapidEye-3,REOrthoTile,https://tiles.planet.com/data/v1/item-types/RE...,"[assets.analytic:download, assets.analytic_sr:...","{'coordinates': [[[-1.7016131, 6.7336899], [-1...",0.0,{'_links': {'_self': 'https://api.planet.com/d...,8704_21284,1.0,10.0,0.0,10.0,10.0,30.0,2014
370,2013-12-21 11:35:33+00:00,20131221_113533_3042121_RapidEye-2,REOrthoTile,https://tiles.planet.com/data/v1/item-types/RE...,"[assets.analytic:download, assets.analytic_sr:...","{'coordinates': [[[-1.658991768002014, 6.73356...",0.0,{'_links': {'_self': 'https://api.planet.com/d...,8704_21284,1.0,10.0,0.0,10.0,10.0,30.0,2013
337,2012-01-08 11:25:41+00:00,20120108_112541_3042121_RapidEye-4,REOrthoTile,https://tiles.planet.com/data/v1/item-types/RE...,"[assets.analytic:download, assets.analytic_sr:...","{'coordinates': [[[-1.7016131, 6.7336899], [-1...",0.0,{'_links': {'_self': 'https://api.planet.com/d...,8704_21284,1.0,5.0,0.0,10.0,10.0,25.0,2012
338,2011-12-22 11:32:41+00:00,20111222_113241_3042121_RapidEye-1,REOrthoTile,https://tiles.planet.com/data/v1/item-types/RE...,"[assets.analytic:download, assets.analytic_sr:...","{'coordinates': [[[-1.7016131, 6.7336899], [-1...",0.02,{'_links': {'_self': 'https://api.planet.com/d...,8704_21284,1.0,10.0,0.0,0.0,10.0,20.0,2011


In [27]:
# Create thumbs for all selected items
save_thumb(all_df)

# 2. Order assets

In [28]:
# To create the order we need a dataframe with filtered items,
# and a samples_gdf with sample_id and geometry to clip each item.

# Build an order for each sample and store in a orders_list
orders = []
for idx, row in samples_gdf.iterrows():
    order = build_order_from_metadata(all_df, samples_gdf, sample_id=idx)
    orders.append(order)

### Request order
<font color='red'>The following lines will start the order in the planet server, once the order is placed and running, there is no way to stop it.</font>

NOTE: The following loop will skip the samples that have already been downloaded, however it's based on the existing_orders request, and we are not sure how long the requests will remain in the planet server.

In [29]:
# Request the existing orders and store their sample_id (name)
existing_orders = client.get_orders().get()
ordered_sample_ids = [o['name'] for o in existing_orders['orders']]
orders_info = []
for new_order in orders:

    # Make sure that the sample is not already downloaded
    if new_order['name'] not in ordered_sample_ids:
        
        # The following line will create the order in the server
        order_info = client.create_order(new_order).get()
        order_id = order_info['id']
        sample_name = order_info['name']
        orders_info.append(order_info)
        print(f'order {order_id} with {sample_name} has been placed.')
        sleep(2)
    else:
        sample_name = new_order['name']
        print(f'Skipping {sample_name}: already requested.')

Skipping sample_8848_21404: already requested.
Skipping sample_8650_21398: already requested.
Skipping sample_8704_21284: already requested.
Skipping sample_8726_21476: already requested.
Skipping sample_8698_21346: already requested.


In [30]:
# order_id = order_info['id']
# order_id
# track_order(order_id, client)

# 3. Download

In [31]:
download_path = os.path.join(os.getcwd(),'downloads')
current_server_orders = client.get_orders().get()

# Define the dates in which the desired oreders were ordered.

start_date = datetime.date(2020,9,10)
stop_date = datetime.date(2020,9,10)
success_states = ['success', 'partial']

for order in current_server_orders['orders']:
    
    created_on = pd.to_datetime(order['created_on']).date()
    state = order['state']
    
    if state in success_states:

        if created_on >= start_date and created_on <= stop_date:
            # Create the download folder
            download_order_path = os.path.join(download_path, order['name'])
            Path(download_order_path).mkdir(parents=True, exist_ok=True)

            # Check if folder is empty:
            if not os.listdir(download_order_path) :
                print(f'downloading {order["name"]} ')
                callback = api.write_to_file(directory=f'{download_order_path}/', overwrite=True)
                locations = client.download_order(order['id'], callback=callback)
            else:
                print(f'The folder {download_order_path} is not empty ')


The folder /home/dguerrero/2_DataNotebooks/4_PLANET_DOWNLOAD/downloads/sample_8704_21284 is not empty 
The folder /home/dguerrero/2_DataNotebooks/4_PLANET_DOWNLOAD/downloads/sample_8698_21346 is not empty 
The folder /home/dguerrero/2_DataNotebooks/4_PLANET_DOWNLOAD/downloads/sample_8726_21476 is not empty 
The folder /home/dguerrero/2_DataNotebooks/4_PLANET_DOWNLOAD/downloads/sample_8704_21284 is not empty 
The folder /home/dguerrero/2_DataNotebooks/4_PLANET_DOWNLOAD/downloads/sample_8650_21398 is not empty 
The folder /home/dguerrero/2_DataNotebooks/4_PLANET_DOWNLOAD/downloads/sample_8848_21404 is not empty 
