In [1]:
from planet_helpers import load_search_files
from datetime import datetime
from datetime import timedelta
import pandas as pd

import os 
import random
from shapely.geometry import shape, Polygon
from shapely.ops import unary_union
from itertools import combinations



import matplotlib.pyplot as plt


DATA_DIR = "../data"
SEARCH_DIR = DATA_DIR + "/searches"



## Import Data

In [None]:
# import data
timeline_df = pd.read_csv(DATA_DIR + "/ACLED_Ukraine_events_timeline.csv")

## Try out Functions

In [66]:
# select random search
randint = random.randint(0, 130000)

searches = load_search_files(SEARCH_DIR, num_files=1, start_index=randint)

In [67]:
results = searches["results"]
aoi = searches["filter"]["config"][0]["config"]["coordinates"][0]
# Create a Polygon geometry
polygon = Polygon(aoi)

day_minus_5 = datetime.strptime(searches["filter"]["config"][1]["config"]["gt"], "%Y-%m-%dT%H:%M:%SZ")
event_date = (day_minus_5 + timedelta(days=5)).strftime("%Y-%m-%d")


In [68]:
# sort results by clear percent
results_example = sorted(results, key=lambda x: x["properties"].get("clear_percent", 0), reverse=True)

In [None]:
best_before, best_after = find_best(results, event_date, polygon)
best_after

In [None]:
print(event_date)

In [None]:
# print acquired date, then new line, then with indent, print id and clear results for all results sorted by the day they were acquired
days = set(result["properties"]["acquired"].split("T")[0] for result in results)
for day in sorted(days):
    print(day)
    for result in results:
        if day in result["properties"]["acquired"]:
            print("\t", result["id"], result["properties"].get("clear_percent", 0))
    print("\n")

## Plot geoms

In [None]:
aoi = searches["filter"]["config"][0]["config"]["coordinates"][0]
# Create a Polygon geometry
polygon = Polygon(aoi)

# Plot the polygon
fig, ax = plt.subplots()

if isinstance(polygon, Polygon):
    x, y = polygon.exterior.xy
    ax.fill(x, y, alpha=0.5, fc='red', ec='black', label='Union Geometry')
plt.show()

In [14]:
geoms = [x["geometry"] for x in searches["results"]]

In [None]:
# Convert dictionaries to Shapely geometry objects
shapely_geometries = [shape(obj) for obj in day_result_geoms]

# Compute the union of all geometries
union_geometry = unary_union(shapely_geometries)

# Print the result
print(union_geometry)

In [None]:
# Plotting
fig, ax = plt.subplots()

# Plot each original geometry
for geom in shapely_geometries:
    if isinstance(geom, Polygon):
        x, y = geom.exterior.xy
        ax.fill(x, y, alpha=0.1, fc='blue', ec='black', label='Original Geometries')
x, y = polygon.exterior.xy
ax.fill(x, y, alpha=0.5, fc='red', ec='black', label='Union Geometry')

In [None]:
# Plot the union geometry
fig, ax = plt.subplots()

if isinstance(union_geometry, Polygon):
    x, y = union_geometry.exterior.xy
    ax.fill(x, y, alpha=0.5, fc='red', ec='black', label='Union Geometry')
plt.show()


## Define Functions

In [71]:
def append_to_csv(file_path, data, header=None):
    file_exists = os.path.isfile(file_path)
    with open(file_path, 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        if header and not file_exists:
            writer.writerow(header)
        writer.writerows(data)

def get_combinations(elements):
    """Generate all non-empty combinations of the given elements sorted by their clear percent value."""
    combins = []
    for r in range(1, len(elements) + 1):
        for combo in combinations(elements, r):
            combo = sorted(combo, key=lambda x: x["properties"].get("clear_percent", 0), reverse=True)
            combins.append(list(combo))
    return combins

def calculate_overlap(geometry, aoi):
    """Calculate the overlap area between a geometry and the area of interest."""
    return geometry.intersection(aoi).area / aoi.area

def calculate_weighted_clear_percentage(combo, aoi):
    """Calculate the weighted average of clear percentage for a combination."""
    visible_area = Polygon()
    total_visible_area = 0.0
    combo_clear_percentage = 0
    weighted_sum = 0
    for result in combo:
        geom = shape(result["geometry"])
        clear_percentage = result["properties"].get("clear_percent", 0)

        # Calculate the intersection of the current polygon with the AOI
        intersection = geom.intersection(aoi)
        
        # Calculate the new visible area by subtracting the already visible area
        new_visible_area = intersection.difference(visible_area)
        
        # Update the visible area
        visible_area = unary_union([visible_area, new_visible_area])
        
        # Calculate the area of the new visible part
        new_visible_area_size = new_visible_area.area
        
        # Update the weighted sum and total visible area
        weighted_sum += new_visible_area_size * clear_percentage
        total_visible_area += new_visible_area_size

    combo_clear_percentage = weighted_sum / total_visible_area

    return combo_clear_percentage

def find_best_combination(day_results, aoi):
    """Find the best combination of results for a given day."""
    best_combination = None
    best_weighted_average = -1
    smallest_size = float('inf')

    for combo in get_combinations(day_results):
        combo_ids = [result["id"] for result in combo]
        combo_geoms = [shape(result["geometry"]) for result in combo]
        combo_union = unary_union(combo_geoms)
        combo_overlap = calculate_overlap(combo_union, aoi)

        if combo_overlap > 0.95:
            combo_clear_percentage = calculate_weighted_clear_percentage(combo, aoi)

            # Check if this combination is better or equally good but smaller
            if (combo_clear_percentage > best_weighted_average) or \
               (combo_clear_percentage == best_weighted_average and len(combo) < smallest_size):
                best_weighted_average = combo_clear_percentage
                best_combination = combo_ids
                smallest_size = len(combo)

    return best_combination, best_weighted_average

def find_best(results, event_date, aoi):
    """Find the best image combinations before and after a given event date."""
    aoi_polygon = Polygon(aoi)
    days = set(result["properties"]["acquired"].split("T")[0] for result in results)
    best_day_results = {}

    for day in days:
        day_results = [result for result in results if result["properties"]["acquired"].split("T")[0] == day]
        day_result_geoms = [shape(result["geometry"]) for result in day_results]
        union_geometry = unary_union(day_result_geoms)
        overlap = calculate_overlap(union_geometry, aoi_polygon)

        if overlap > 0.95:
            best_combination, best_weighted_average = find_best_combination(day_results, aoi_polygon)
            
            if best_combination:
                best_day_results[day] = (best_combination, best_weighted_average)

    best_before = None
    best_after = None

    for day in best_day_results:
        if day < event_date:
            if not best_before or best_day_results[day][1] > best_day_results[best_before][1]:
                best_before = day
        elif day > event_date:
            if not best_after or best_day_results[day][1] > best_day_results[best_after][1]:
                best_after = day

    best_before_result = best_day_results.get(best_before, (None, None))
    best_after_result = best_day_results.get(best_after, (None, None))

    return best_before_result, best_after_result

def process_data(data):
    # Placeholder for data processing logic
    # This function should return a list of processed data rows
    processed_data = []
    for search in data:
        search_name = search["name"]
        search_id = search["id"]
        aoi = search["filter"]["config"][0]["config"]["coordinates"][0]
        day_minus_5 = datetime.strptime(searches["filter"]["config"][1]["config"]["gt"], "%Y-%m-%dT%H:%M:%SZ")
        event_date = (day_minus_5 + timedelta(days=5)).strftime("%Y-%m-%d")
        quality_results = [result for result in search["results"] if result["properties"]["quality_category"] == "standard"]
        best_before, best_after = find_best(quality_results, event_date, aoi)

    return processed_data

## Main 

In [None]:
# create best matches for each search with columns search_id, before_image_id, before_cloud_cover/clarity etc., after_image_id, after_cloud_cover

start_index = 0
chunk_size = 10000

while True:
    # Load a chunk of files
    json_data = load_search_files(SEARCH_DIR, chunk_size, start_index=start_index)
    if not json_data:
        break

    # Process the loaded data
    processed_data = process_data(json_data)

    # Append the processed data to the CSV file
    append_to_csv(OUTPUT_CSV, processed_data, header=header)

    # Update the start index for the next chunk
    start_index += chunk_size