In [None]:
from scripts.functions import *
from parameters import *

to use your own api keys and parameters, copy paste the `parameters.py.dist` file in the same folder and remove the `.dist` extention. You can then replace the string with your own keys. only the .dist will be pushed to the dist git rep. 

# 1. Search items


### Create bounding box from centroids

In [None]:
# Spe
samples_gdf = read_from_centroids(projected_epsg='EPSG:21148', buffer=350, sep=';')

In [None]:
samples_gdf

### Connect to client

In [None]:
client = api.ClientV1(api_key=PLANET_API_KEY)

### Define filters

In [None]:
# define test data for the filter
start_date = datetime.datetime(2009, 1, 1)
stop_date = datetime.datetime(2020, 12, 31)
cloud_cover_lte = 0.02
minimum_covered_area = 90 # included

### Define scores

In [None]:
# item_type_score
item_type_score = {
    'PSScene4Band':9, 
    'PSScene3Band':7, 
    'PSOrthoTile':8,
    'REOrthoTile':0,
    'SkySatScene':0,
}

# season score
months_score = {
    1: 5, 7:0,
    2: 5, 8:0,
    3: 5, 9:0,
    4: 0, 10:7,
    5: 0, 11:10,
    6: 0, 12:10,
}

# cloud_score

def cloud_score(cloud_cover):
    """ Define the cloud cover threshold and score
    
    1 = 1%
    
    """
    cloud_cover = cloud_cover*100
    
    if cloud_cover == 0:
        return 10
    elif cloud_cover <= 1 and cloud_cover > 0:
        return 5
    else:
        return 0
    
# Covered area

def cover_score(covered_area):
    """Define the cover area threshold and score
    """
    covered_area = covered_area*100
    
    if covered_area >= 99:
        return 10
    
    elif covered_area >= 95:
        return 5
    
    else:
        return 0
    


## OPTION: 1.1 Get items for individual samples ((optional))
### Get items and metadata using filters

In [None]:
# Define AOI, by selecting the first row of the samples geodataframe
# For this example, we are going to use the first sample
row_number = 0
aoi_geometry = json.loads(dumps(samples_gdf.iloc[row_number].geometry))
sample_id = samples_gdf.iloc[row_number].name

In [None]:
%%time
request = build_request(aoi_geometry, start_date, stop_date, cloud_cover_lte)
items = get_items(sample_id, request, client)

# Transform items into a pandas dataframe with useful columns
metadata_df = get_dataframe(items)

### Calculate percentage of covered area

Calculate the percentage of covered area from the sample area with the item footprint

In [None]:
# Mutate metadata_df and add the percentage of cover area
add_cover_area(metadata_df, samples_gdf)
# Remove items that are under 90% of covered area
metadata_df = metadata_df[metadata_df.cover_perc >= (minimum_covered_area/100)]

### Score items


In [None]:
scored_items = score_items(metadata_df, item_type_score, months_score, cloud_score, cover_score)

In [None]:
by_month = False
by_every = 0

if by_every:
    selected_items = get_one_item_every_x(scored_items, every=by_every)

elif by_month:
    selected_items = get_one_item_per_month(scored_items)

else:
    selected_items = get_one_item_per_year(scored_items)

In [None]:
len(selected_items)

In [None]:
selected_items

### ((Optional)): Export thumbnails
Create thumbnails from the selected items (dataframe) and store them into a structured folder

In [None]:
save_thumb(metadata_df)

## OPTION 1.2 Get items for all plots and store into a big df


### Selection method
The loop will search all the images between the given start-end date, and the minimum cloud coverage.<br>
After that it will calculate the sample covered area with the image item footprint and then will remove items which are under the given threshold.<br>
The next step is rank the items by the selected parameters <br>
#### Temporal selection
The user has to select the desired time span for get the images: 1 per year, 1 per month, or one every x images.

In [None]:
# If by_month is True, one image per month will be chosen, otherwise one per year.
# By default it will process only one image per year

by_month = False
by_every = 1

### Loop over all plots in parellel
Loop over all plots and get the items.

In [None]:
def run_multiprocess(index, row, srch_log_file, by_month=False, by_every=0, skip_items=None):
    
    aoi_geometry = json.loads(dumps(row.geometry))
    sample_id = row.name
    
    if by_every:
        pickle_df_name = os.path.join(OUT_PIKL_PATH, str(sample_id)+'_every.p')
    elif by_month:
        pickle_df_name = os.path.join(OUT_PIKL_PATH, str(sample_id)+'_month.p')
    else:
        pickle_df_name = os.path.join(OUT_PIKL_PATH, str(sample_id)+'_year.p')
        
    if not os.path.exists(pickle_df_name):
        request = build_request(aoi_geometry, start_date, stop_date, cloud_cover_lte)

        try:
            print(f'Starting {sample_id}')
            items = get_items(sample_id, request, client)
            # Transform items into a pandas dataframe with useful columns
            metadata_df = get_dataframe(items)
            
            # Skip items with errors
            if skip_items:
                metadata_df = metadata_df[~metadata_df.id.isin(skip_items)]
            
            # Mutate metadata_df and add the percentage of cover area
            add_cover_area(metadata_df, samples_gdf)

            # Remove items that are under the minimum_covered_area threshold
            metadata_df = metadata_df[metadata_df.cover_perc >= (minimum_covered_area/100)]

            # Create a score for each item
            scored_items = score_items(metadata_df, item_type_score, months_score, cloud_score, cover_score)
            
            if by_every:
                # Filter scored_items and get one item every x items
                selected_items = get_one_item_every_x(scored_items, every=by_every)
            
            elif by_month:
                # Filter scored_items and get only one per month
                selected_items = get_one_item_per_month(scored_items)
            else:
                # Filter scored_items and get only one per year
                selected_items = get_one_item_per_year(scored_items)
            
            # Save into a pickled file
            selected_items.to_pickle(pickle_df_name)
            
            print(f'{sample_id} pickled.')
            
        except Exception as e:
            print(f'there was an error with the sample {sample_id}, please check the log files.')
            with open(srch_log_file, 'a') as lf:
                lf.write(f'"{sample_id}":{e}\n')

    else:
        print(f'Search for {sample_id} already saved.')

In [None]:
len(samples_gdf)

### Skip error items from logs
Uncomment the next cell if you have a log file with "no access to assets" elements, so the process will skip them.

<br> If you are using this option, please delete the previous searches pickled files from the failed samples (search failed samples with the commands in step 4) 

In [None]:
skip_items = None
# skip_items = get_no_access_assets_from_log('logs/order_logs_20200916_15_02.txt')
# len(skip_items)

In [None]:
%%time
if __name__ == '__main__':

    # Create a log file
    now = datetime.datetime.now()
    formated_now = now.strftime('%Y%m%d_%H_%M')
    srch_log_file = os.path.join(LOG_PATH, f'search_logs_{formated_now}.txt')
    
    # Set the number of parallel processes
    pool = multiprocessing.Pool(4)
    
    for index, row in samples_gdf.iterrows():
        pool.apply_async(run_multiprocess, args=(index, row, srch_log_file, by_month, by_every, skip_items))
        
    pool.close()
    pool.join()

### Read all the pickled files, merge and store them in a big df

In [None]:
pickled_files = glob.glob(os.path.join(OUT_PIKL_PATH,'*every.p'))
len(pickled_files)

In [None]:
all_df = pd.concat([pd.read_pickle(pkl) for pkl in pickled_files])

In [None]:
len(all_df)

In [None]:
all_df

# 2. Order assets
### Create json request

In [None]:
products_bundles = {

    # Is not possible to ask for analytic_dn in PSScene3Band, so the next option is visual
    # for more info go to https://developers.planet.com/docs/orders/product-bundles-reference/
    'PSScene3Band': "analytic,visual",
    'PSScene4Band': "analytic_udm2,analytic_sr,analytic",
    'PSOrthoTile': "analytic_5b_udm2,analytic_5b,analytic_udm2,analytic,visual",
    'REOrthoTile': "analytic,visual",
}

In [None]:
# To create the order we need a dataframe with filtered items,
# and a samples_gdf with sample_id and geometry to clip each item.

# Build an order for each sample and store in a orders_list
orders = []
samples_ids = list(all_df.sample_id.unique())
for idx, row in samples_gdf.iterrows():
    if idx in samples_ids:
        order = build_order_from_metadata(all_df, idx, row, products_bundles)
        orders.append(order)

In [None]:
len(orders)

### Request order
<font color='red'>The following lines will start the order in the planet server, once the order is placed and running, there is no way to stop it.</font>

NOTE: The following loop will skip the samples that have already been downloaded, however it's based on the existing_orders request, and we are not sure how long the requests will remain in the planet server.

In [None]:
# Request the existing orders and store their sample_id (name)
current_server_orders = get_existing_orders(client)
ordered_sample_names = [order['name'] for order in current_server_orders]

now = datetime.datetime.now()
formated_now = now.strftime('%Y%m%d_%H_%M')
ordr_log_file = os.path.join(LOG_PATH, f'order_logs_{formated_now}.txt')

orders_info = []
for new_order in orders:

    # Make sure that the sample is not already downloaded
    sample_name = new_order['name']
    if sample_name not in ordered_sample_names:
        
        try:
            # The following line will create the order in the server
            @backoff.on_exception(backoff.expo,planet.api.exceptions.OverQuota, max_time=360)
            def place_order():
                return client.create_order(new_order).get()
            
            order_info = place_order()
            orders_info.append(order_info)
            
            order_id = order_info['id']
            sample_name = order_info['name']
            
            print(f'order {order_id} with {sample_name} has been placed.')
            
        except Exception as e:
            with open(ordr_log_file, 'a') as lf:
                print(f'there was an error with the sample {sample_name}, please check the log files.')
                lf.write(f'Sample {sample_name}:{e}\n')
print('Finished')

## Get status
The following get_order_status line has to be re-runned everytime we want to know the orders statusw

In [None]:
# Use the pages to limit the search, every page will display 20 orders.
get_orders_status(client, pages=None).sort_values(by=['created_on'])

# 3. Download

In [None]:
%%time
current_server_orders = get_existing_orders(client, pages=None)
len(current_server_orders)

In [None]:
# Define the dates in which the desired oreders were ordered.

now = datetime.datetime.now()
formated_now = now.strftime('%Y%m%d_%H_%M')
dw_log_file = os.path.join(LOG_PATH, f'download_logs_{formated_now}.txt')

# Select the dates in which you want to download the images
start_date = datetime.date(2020,7,16)
stop_date = datetime.date(2020,9,18)
success_states = ['success', 'partial']

for order in current_server_orders:
    
    created_on = pd.to_datetime(order['created_on']).date()
    state = order['state']
    
    if state in success_states:

        if created_on >= start_date and created_on <= stop_date:
            # Create the download folder
            download_order_path = os.path.join(DOWNLOAD_PATH, order['name'])
            Path(download_order_path).mkdir(parents=True, exist_ok=True)

            # Check if there is a .zip file inside the folder
            if not any(['.zip' in f for f in os.listdir(download_order_path)]):
                try:
                    sample_name = order["name"]
                    print(f'downloading {sample_name} ')
                    callback = api.write_to_file(directory=f'{download_order_path}/', overwrite=True)

                    @backoff.on_exception(backoff.expo,planet.api.exceptions.OverQuota,max_time=360)
                    def download():
                        return client.download_order(order['id'], callback=callback)
                    responses = download()
                    sleep(0.5)
                    
                except Exception as e:
                    print(f'There was an error with {sample_name}, please check download log file.')
                    with open(dw_log_file, 'w') as lf:
                        lf.write(f'Sample {sample_name}:{e}\n')
print('done!')

# 4. Additional commands

In [None]:
failed_samples = [x[0] for x in get_no_access_assets_from_log('logs/order_logs_20200915_12_41_yelena.txt')]
len(failed_samples)

In [None]:
failed_items_ids = [x[1] for x in get_no_access_assets_from_log('logs/order_logs_20200915_12_41_yelena.txt')]
failed_items_ids