In [1]:
import glob
import multiprocessing

In [2]:
from scripts.functions import *

In [3]:
from parameters import *

to use your own api keys and parameters, copy paste the `parameters.py.dist` file in the same folder and remove the `.dist` extention. You can then replace the string with your own keys. only the .dist will be pushed to the dist git rep. 

# 1. Search items
### Get the samples dataframe

From a geojson plots file, create a geo pandas dataframe to store the geometries and the id of each plot, it'll be used as a geometry filter and to calculate the % of area covered by the items.

In [None]:
samples_gdf = pd.read_pickle('shp/samples.p')

In [None]:
#create a geoDataFrame object from a .txt file 
if os.path.isfile(FILENAME):
    df = pd.read_csv(FILENAME, sep=' ')
    
    #filter only the `nb_rows` first rows
    nb_rows = 1000#len(df)
    filter_df  = df[df.index.isin(range(nb_rows))]
    df = filter_df
    
    #create the geodataframe 
    pts = [Point(df.loc[i][FILE_LNG], df.loc[i][FILE_LAT]) for i in range(len(df))]
    samples_gdf = gpd.GeoDataFrame(data={'geometry': pts}, index=df[FILE_ID], crs="EPSG:4326")
    samples_gdf.index.names = ['id']
    samples_gdf = samples_gdf.to_crs('ESRI:54032')
    samples_gdf['geometry'] = samples_gdf['geometry'].buffer(BUFFER_SIZE)
    samples_gdf = samples_gdf.to_crs("EPSG:4326")
    samples_gdf['geometry'] = samples_gdf['geometry'].envelope

### Connect to client

In [4]:
client = api.ClientV1(api_key=PLANET_API_KEY)

### Define filters

In [None]:
# define test data for the filter
start_date = datetime.datetime(2009, 1, 1)
stop_date = datetime.datetime(2020, 12, 31)
cloud_cover_lte = 0.01
minimum_covered_area = 90 # included

### Define scores

In [None]:
# item_type_score
item_type_score = {
    'PSScene4Band':8, 
    'PSScene3Band':8, 
    'PSOrthoTile':10,
    'REOrthoTile':0,
    'SkySatScene':0,
}

# season score
months_score = {
    1: 5, 7:0,
    2: 5, 8:0,
    3: 5, 9:0,
    4: 0, 10:7,
    5: 0, 11:10,
    6: 0, 12:10,
}

# cloud_score

def cloud_score(cloud_cover):
    """ Define the cloud cover threshold and score
    
    1 = 1%
    
    """
    cloud_cover = cloud_cover*100
    
    if cloud_cover == 0:
        return 10
    elif cloud_cover <= 1 and cloud_cover > 0:
        return 5
    else:
        return 0
    
# Covered area

def cover_score(covered_area):
    """Define the cover area threshold and score
    """
    covered_area = covered_area*100
    
    if covered_area >= 99:
        return 10
    
    elif covered_area >= 95:
        return 5
    
    else:
        return 0
    


## OPTION: 1.1 Get items for individual samples ((optional))
### Get items and metadata using filters

In [None]:
# Define AOI, by selecting the first row of the samples geodataframe
# For this example, we are going to use the first sample
row_number = 2
aoi_geometry = json.loads(dumps(samples_gdf.iloc[row_number].geometry))
sample_id = samples_gdf.iloc[row_number].name

In [None]:
%%time
request = build_request(aoi_geometry, start_date, stop_date, cloud_cover_lte)
items, response = get_items(sample_id, request, client)

# Transform items into a pandas dataframe with useful columns
metadata_df = get_dataframe(items)

In [None]:
sample_id

In [None]:
len(metadata_df)

In [None]:
result = client.quick_search(request)

In [None]:
result

In [None]:
items = list(result.items_iter(limit=1000000))

In [None]:
result.get()

In [None]:
metadata_df['date']

### Calculate percentage of covered area

Calculate the percentage of covered area from the sample area with the item footprint

In [None]:
# Mutate metadata_df and add the percentage of cover area
add_cover_area(metadata_df, samples_gdf)

In [None]:
# Remove items that are under 90% of covered area
metadata_df = metadata_df[metadata_df.cover_perc >= (minimum_covered_area/100)]

### Score items


In [None]:
scored_items = score_items(metadata_df, item_type_score, months_score, cloud_score, cover_score)

In [None]:
selected_items = get_one_item_per_year(scored_items)

### ((Optional)): Export thumbnails
Create thumbnails from the selected items (dataframe) and store them into a structured folder

In [None]:
save_thumb(selected_items)

## OPTION 1.2 Get items for all plots and store into a big df
### Loop over all plots
Loop over all plots and get the items.

In [None]:
samples_gdf=samples_gdf[100:200]

In [None]:
# Create a list of dataframes 

LOG_PATH = os.path.join(os.getcwd(), 'logs')
Path(LOG_PATH).mkdir(parents=True, exist_ok=True)
srch_log_file = os.path.join(LOG_PATH, 'search_logs.txt')

OUT_PIKL_PATH = os.path.join(os.getcwd(), 'searches')
Path(OUT_PIKL_PATH).mkdir(parents=True, exist_ok=True)

def run_multiprocess(index, row):    
    aoi_geometry = json.loads(dumps(row.geometry))
    sample_id = row.name
    
    pickle_df_name = os.path.join(OUT_PIKL_PATH, str(sample_id)+'.p')

    if not os.path.exists(pickle_df_name):
        request = build_request(aoi_geometry, start_date, stop_date, cloud_cover_lte)

        try:
            print(f'Starting {sample_id}')
            items, response = get_items(sample_id, request, client)
            # Transform items into a pandas dataframe with useful columns
            metadata_df = get_dataframe(items)
            
            # Mutate metadata_df and add the percentage of cover area
            add_cover_area(metadata_df, samples_gdf)

            # Remove items that are under the minimum_covered_area threshold
            metadata_df = metadata_df[metadata_df.cover_perc >= (minimum_covered_area/100)]

            # Create a score for each item
            scored_items = score_items(metadata_df, item_type_score, months_score, cloud_score, cover_score)

            # Filter scored_items and get only one per year
            selected_items = get_one_item_per_year(scored_items)
            
            # Save into a pickled file
            selected_items.to_pickle(pickle_df_name)
            
            print(f'{sample_id} pickled.')
            
        except Exception as e:
            with open(srch_log_file, 'w') as lf:
                lf.write('\n'.join(f'{sample_id}:{e}'))

    else:
        print(f'Search for {sample_id} already saved.')

In [None]:
%%time
if __name__ == '__main__':
    pool = multiprocessing.Pool(8)
    for index, row in samples_gdf.iterrows():
        pool.apply_async(run_multiprocess, args=(index, row,))
    pool.close()
    pool.join()

In [None]:
pkl_list = glob.glob(os.path.join(OUT_PIKL_PATH, '*p'))

In [None]:
all_df = pd.concat([pd.read_pickle(pkl) for pkl in pkl_list])

In [None]:
all_df.to_pickle('congo_100_200.p')

In [None]:
all_df = pd.read_pickle('congo_100_200.p')

In [None]:
all_df[all_df.sample_id==359827]

# 2. Order assets

In [None]:
# To create the order we need a dataframe with filtered items,
# and a samples_gdf with sample_id and geometry to clip each item.

# Build an order for each sample and store in a orders_list
orders = []
for idx, row in samples_gdf.iterrows():
    order = build_order_from_metadata(all_df, samples_gdf, sample_id=idx)
    orders.append(order)

### Request order
<font color='red'>The following lines will start the order in the planet server, once the order is placed and running, there is no way to stop it.</font>

NOTE: The following loop will skip the samples that have already been downloaded, however it's based on the existing_orders request, and we are not sure how long the requests will remain in the planet server.

In [None]:
# Request the existing orders and store their sample_id (name)
existing_orders = client.get_orders().get()
ordered_sample_ids = [o['name'] for o in existing_orders['orders']]
orders_info = []
for new_order in orders:

    # Make sure that the sample is not already downloaded
    if new_order['name'] not in ordered_sample_ids:
        
        # The following line will create the order in the server
        @backoff.on_exception(backoff.expo,
                              (planet.api.exceptions.OverQuota, 
                              planet.api.exceptions.BadQuery),
                              max_time=360)
        order_info = client.create_order(new_order).get()
        order_id = order_info['id']
        sample_name = order_info['name']
        orders_info.append(order_info)
        print(f'order {order_id} with {sample_name} has been placed.')
        sleep(2)
    else:
        sample_name = new_order['name']
        print(f'Skipping {sample_name}: already requested.')

In [None]:
# order_id = order_info['id']
# order_id
# track_order(order_id, client)

# 3. Download

In [6]:
download_path = os.path.join(os.getcwd(),'downloads')

# Search all the requested orders per page
# Fixed api.models NEXT_KEY parameter from "_next" to "next"

ordered_orders = client.get_orders()
ordered_orders.NEXT_KEY = "next"
order_pages=[]

# We can limit the search to certain number of pages
# if we leave as none, will search over all of them
limit_to_x_pages = None
for page in ordered_orders.iter(limit_to_x_pages):
    page.NEXT_KEY = "next"
    order_pages.append(page.get())

current_server_orders = [order for page in order_pages for order in page['orders']]

In [8]:
LOG_PATH = os.path.join(os.getcwd(), 'logs')
Path(LOG_PATH).mkdir(parents=True, exist_ok=True)
dw_log_file = os.path.join(LOG_PATH, 'download_logs.txt')

In [19]:
# Define the dates in which the desired oreders were ordered.
start_date = datetime.date(2020,9,10)
stop_date = datetime.date(2020,9,10)
success_states = ['success', 'partial']

for order in current_server_orders:
    
    created_on = pd.to_datetime(order['created_on']).date()
    state = order['state']
    
    if state in success_states:

        if created_on >= start_date and created_on <= stop_date:
            # Create the download folder
            download_order_path = os.path.join(download_path, order['name'])
            Path(download_order_path).mkdir(parents=True, exist_ok=True)

            # Check if folder is empty:
            if not os.listdir(download_order_path) :
                try:
                    print(f'downloading {order["name"]} ')
                    callback = api.write_to_file(directory=f'{download_order_path}/', overwrite=True)

                    @backoff.on_exception(backoff.expo,planet.api.exceptions.OverQuota,max_time=360)
                    def download():
                        client.download_order(order['id'], callback=callback)
                    download()
                    
                except Exception as e:
                    with open(dw_log_file, 'w') as lf:
                        lf.write('\n'.join(f'{sample_id}:{e}'))
            else:
                print(f'The folder {download_order_path} is not empty ')

In [None]:
!curl -L -H "Authorization: api-key 7779c34186f04031b0734e1b7b7ad820" \
    'https://api.planet.com/auth/v1/experimental/public/my/subscriptions'
