In [None]:
from scripts.functions import *
from parameters import *

to use your own api keys and parameters, copy paste the `parameters.py.dist` file in the same folder and remove the `.dist` extention. You can then replace the string with your own keys. only the .dist will be pushed to the dist git rep. 

# 1. Search items


### Create bounding box from centroids

In [None]:
# Specify a projected EPSG for the centroids file
# EPSG:21148 is for Indonesia.
samples_gdf = read_from_centroids(projected_epsg='EPSG:21148', buffer=350, sep=';')

In [None]:
# Check the shape of the first geometry
# print(samples_gdf.iloc[0].geometry.area)
# samples_gdf.iloc[0].geometry

### Connect to client

In [None]:
client = api.ClientV1(api_key=PLANET_API_KEY)

### Define scores

In [None]:
# item_type_score
item_type_score = {
    'PSScene4Band':10, 
    'PSScene3Band':8, 
    'PSOrthoTile':8,
    'REOrthoTile':0,
    'SkySatScene':0,
}

# season score
months_score = {
    1: 0, 7:8,
    2: 0, 8:10,
    3: 0, 9:10,
    4: 7, 10:8,
    5: 7, 11:0,
    6: 7, 12:0,
}

# cloud_score

def cloud_score(cloud_cover):
    """ Define the cloud cover threshold and score
    
    1 = 1%
    
    """
    cloud_cover = cloud_cover*100
    
    if cloud_cover == 0:
        return 10
    elif cloud_cover <= 5 and cloud_cover > 0:
        return 5
    else:
        return 0
    
# Covered area

def cover_score(covered_area):
    """Define the cover area threshold and score
    """
    covered_area = covered_area*100
    
    if covered_area >= 99:
        return 10
    
    elif covered_area >= 95:
        return 5
    
    else:
        return 0
    


## OPTION 1.2 Get items for all plots and store into a big df


### Selection method
The loop will search all the images between the given start-end date, and the minimum cloud coverage.<br>
After that it will calculate the sample covered area with the image item footprint and then will remove items which are under the given threshold.<br>
The next step is rank the items by the selected parameters <br>
#### Temporal selection
The user has to select the desired time span for get the images: 1 per year, 1 per month, or one every x images.

In [None]:
# define test data for the filter
start_date = datetime.datetime(2017, 1, 1)
stop_date = datetime.datetime(2020, 12, 31)
cloud_cover_lte = 0.10
minimum_covered_area = 90 # included

# If by_month is True, one image per month will be chosen, otherwise one per year.
# By default it will process only one image per year

by_month = False
by_every = None

### Loop over all plots in parellel
Loop over all plots and get the items.

In [None]:
def run_multiprocess(index, row, srch_log_file, by_month=False, by_every=0, skip_items=None):
    
    aoi_geometry = json.loads(dumps(row.geometry))
    sample_id = row.name
    
    if by_every:
        pickle_df_name = os.path.join(OUT_PIKL_PATH, str(sample_id)+'_every.p')
    elif by_month:
        pickle_df_name = os.path.join(OUT_PIKL_PATH, str(sample_id)+'_month.p')
    else:
        pickle_df_name = os.path.join(OUT_PIKL_PATH, str(sample_id)+'_year.p')
        
    if not os.path.exists(pickle_df_name):
        request = build_request(aoi_geometry, start_date, stop_date, cloud_cover_lte)

        try:
            print(f'Starting {sample_id}')
            items = get_items(sample_id, request, client)
            # Transform items into a pandas dataframe with useful columns
            metadata_df = get_dataframe(items)
            
            
            # Skip items with errors
            if skip_items:
                skip_items = [x[1] for x in skip_items]
                metadata_df = metadata_df[~metadata_df.id.isin(skip_items)]
            
            # Mutate metadata_df and add the percentage of cover area
            add_cover_area(metadata_df, samples_gdf)

            # Remove items that are under the minimum_covered_area threshold
            metadata_df = metadata_df[metadata_df.cover_perc >= (minimum_covered_area/100)]

            # Create a score for each item
            scored_items = score_items(metadata_df, item_type_score, months_score, cloud_score, cover_score)
            
            if by_every:
                # Filter scored_items and get one item every x items
                selected_items = get_one_item_every_x(scored_items, every=by_every)
            
            elif by_month:
                # Filter scored_items and get only one per month
                selected_items = get_one_item_per_month(scored_items)
            else:
                # Filter scored_items and get only one per year
                selected_items = get_one_item_per_year(scored_items)
            
            # Save into a pickled file
            print(f'Final lenght: {len(selected_items)}')
            selected_items.to_pickle(pickle_df_name)
            
            print(f'{sample_id} pickled.')
            
        except Exception as e:
            print(f'there was an error with the sample {sample_id}, please check the log files.')
            with open(srch_log_file, 'a') as lf:
                lf.write(f'"{sample_id}":{e}\n')

    else:
        print(f'Search for {sample_id} already saved.')

In [None]:
len(samples_gdf)

### Skip error items from logs
Uncomment the next cell if you have a log file with "no access to assets" elements, so the process will skip them.

<br> If you are using this option, please delete the previous searches pickled files from the failed samples (search failed samples with the commands in step 4) 

In [None]:
skip_items = None
# skip_items = get_no_access_assets_from_log('logs/order_logs_20200925_14_56.txt')
# len(skip_items)

In [None]:
len(samples_gdf)

In [None]:
%%time
if __name__ == '__main__':

    # Create a log file
    now = datetime.datetime.now()
    formated_now = now.strftime('%Y%m%d_%H_%M')
    srch_log_file = os.path.join(LOG_PATH, f'search_logs_{formated_now}.txt')
    
    # Set the number of parallel processes
    pool = multiprocessing.Pool(10)
    
    for index, row in samples_gdf.iterrows():
        pool.apply_async(run_multiprocess, args=(index, row, srch_log_file, by_month, by_every, skip_items))
        
    pool.close()
    pool.join()

### Read all the pickled files, merge and store them in a big df

In [None]:
pickled_files = glob.glob(os.path.join(OUT_PIKL_PATH,'*every.p'))
len(pickled_files)

In [None]:
all_df = pd.concat([pd.read_pickle(pkl) for pkl in pickled_files])
# all_df.to_pickle('searches/NAME_OF_PICKLE_FILE.p')
# all_df = pd.read_pickle('searches/NAME_OF_PICKLE_FILE.p')

In [None]:
print(f'there are {len(all_df)} items in the current df')

### (OPTIONAL STEP) Add clear percent metadata (udm2) to images after 2018
This step is intended to be used when creating a dense time series (using by_every or by_month option), not for one image per year.

To more info about the udm2 metadata refer to: https://developers.planet.com/docs/data/udm-2/

In [None]:
# Use the following lines to extract the clear_percent and clear_confidence_percent metadata from 
# items which have this data.

all_df['clear_percent'] = None
all_df['clear_confidence_percent'] = None
pbar = tqdm(total=len(all_df))
for idx, row in all_df.iterrows():
    if 'clear_percent' in list(row.metadata['properties'].keys()):
        all_df.at[idx, 'clear_percent'] = row.metadata['properties']['clear_percent']
        all_df.at[idx, 'clear_confidence_percent'] = row.metadata['properties']['clear_confidence_percent']
    pbar.update(1)
pbar.close()

In [None]:
# After adding this metadata we can filter the images according to our specific needs
min_clear_area = 55

In [None]:
# We have to select the items which are above the min_clear_area, and remove those whose are below
items_with_clear_percent = all_df[all_df.clear_percent >= min_clear_area]
print(f'There are {len(items_with_clear_percent)} items with more than {min_clear_area} clear percentage')

# And add those items which do not have the clear_percent metadata
items_without_clear_percent = all_df[all_df.clear_percent.isnull()]
print(f'There are {len(items_without_clear_percent)} items whose do not have clear_percentage metadata')

In [None]:
# Now we can merge them in the all_df
all_df = pd.concat([items_with_clear_percent, items_without_clear_percent])
print(f'There is a total of {len(all_df)} in the current search')

In [None]:
len(all_df)

# 2. Order assets
### Create json request

In [None]:
products_bundles = {

    # Is not possible to ask for analytic_dn in PSScene3Band, so the next option is visual
    # for more info go to https://developers.planet.com/docs/orders/product-bundles-reference/
    'PSScene3Band': "analytic,visual",
    'PSScene4Band': "analytic,analytic_udm2,analytic_sr",
    'PSOrthoTile': "analytic,analytic_5b_udm2,analytic_5b,analytic_udm2,visual",
    'REOrthoTile': "analytic,visual",
}

In [None]:
# To create the order we need a dataframe with filtered items,
# and a samples_gdf with sample_id and geometry to clip each item.

# Set a prefix for the order name:
prefix = ''
partial = False

# Build an order for each sample and store in a orders_list
orders = []
samples_ids = list(all_df.sample_id.unique())
for idx, row in samples_gdf.iterrows():
    if idx in samples_ids:
        order = build_order_from_metadata(all_df, idx, row, products_bundles, prefix, partial)
        orders.append(order)

In [None]:
len(orders)

### Request order
<font color='red'>The following lines will start the order in the planet server, once the order is placed and running, there is no way to stop it.</font>

NOTE: The following loop will skip the samples that have already been downloaded, however it's based on the existing_orders request, and we are not sure how long the requests will remain in the planet server.

In [None]:
# You can use the pages parameter to limit the number of pages to be consulted
# Every page has 20 elements, so is highly recommend to let it as None to avoid duplicate orders.
# Limit only when you are sure that doesn't have ordered a sample before.
pages = None 

# Request the existing orders and store their sample_id (name)
current_server_orders = get_existing_orders(client, pages=pages)
ordered_sample_names = [order['name'] for order in current_server_orders]

now = datetime.datetime.now()
formated_now = now.strftime('%Y%m%d_%H_%M')
ordr_log_file = os.path.join(LOG_PATH, f'order_logs_{formated_now}.txt')

orders_info = []
pbar = tqdm(total=len(orders))
for new_order in orders:

    # Make sure that the sample is not already downloaded
    sample_name = new_order['name']
    if sample_name not in ordered_sample_names:
        
        try:
            # The following line will create the order in the server
            @backoff.on_exception(backoff.expo,(planet.api.exceptions.OverQuota,
                                               planet.api.exceptions.TooManyRequests),max_time=360)
            def place_order():
                response = client.create_order(new_order).get()
                return response
            
            order_info = place_order()
            orders_info.append(order_info)
            
            order_id = order_info['id']
            sample_name = order_info['name']
            
            print(f'order {order_id} with {sample_name} has been placed.')
            
        except Exception as e:
            with open(ordr_log_file, 'a') as lf:
                print(f'there was an error with the sample {sample_name}, please check the log files.')
                lf.write(f'Sample {sample_name}:{e}\n')
    pbar.update(1)
pbar.close()
print('Finished')

# 3. Additional commands

In [None]:
failed_samples = [x[0] for x in get_no_access_assets_from_log('logs/order_logs_20200925_14_56.txt')]
len(failed_samples)
print(failed_samples)

In [None]:
failed_items_ids = [x[1] for x in get_no_access_assets_from_log('logs/order_logs_20200925_14_56.txt')]
failed_items_ids