In [4]:
import pandas as pd
import asyncio
import nest_asyncio
nest_asyncio.apply()
import getpass
from planet import Auth, Session
import json
from planet_helpers import set_filters, parse_polygon, load_search_files
from datetime import datetime
from tqdm import tqdm
import os

DOWNLOAD_DIR = '../data/planet_data'
FILTER_DIR = '../data/filters'
SEARCH_DIR = "../data/searches_alt"

In [2]:
user = input("Username: ")
pw = getpass.getpass()
auth = Auth.from_login(user,pw)
auth.store()

In [None]:
events = pd.read_csv('../data/ACLED_Ukraine_events_sample.csv')[['location_id', 'event_date', 'event_id_cnty', 'timeline_id']]
non_events = pd.read_csv('../data/ACLED_Ukraine_non_events_sample.csv')[['location_id', 'event_date', 'timeline_id']]

places = pd.read_csv("../data/places.csv")

# merge events with places on location_id
events = events.merge(places, on='location_id')
non_events = non_events.merge(places, on='location_id')

In [None]:
all_filters = []

for event in tqdm(non_events.iterrows(), total=len(non_events)):
   
   coords = parse_polygon(event[1]['geometry'])
   
   geom = {
         "type": "Polygon",
         "coordinates": coords
      }

   date = datetime.strptime(event[1]["event_date"], "%Y-%m-%d")
   # subtract 5 days from the event date
   five_before = (date - pd.DateOffset(days=5)).to_pydatetime()
   five_after = (date + pd.DateOffset(days=5)).to_pydatetime()

   filters = set_filters(from_date=five_before, to_date = five_after, geom=geom)
   all_filters.append(filters)

# save filters
with open(FILTER_DIR + '/filters_non_events.jsonl', 'w') as file:
    for entry in all_filters:
        # Convert each dictionary to a JSON string and write it to the file
        file.write(json.dumps(entry) + '\n')

Apply the filter to searches

In [None]:
for x in ["non_events", "events"]:
    event_type = x
    df = pd.read_csv('../data/ACLED_Ukraine_' + event_type + '_sample.csv')

    async with Session() as sess:
        cl = sess.client('data')

        with open(FILTER_DIR + '/filters_' + event_type + '.jsonl', 'r') as file:
            # Initialize tqdm with manual update
            pbar = tqdm(total=len(df))
            
            for i, line in enumerate(file):
                timeline_id = str(df['timeline_id'][i])
                name = event_type + '_' + timeline_id
                JSON_DIR = SEARCH_DIR + '/' + name + '.json'

                pbar.update(1)
                
                # Check if file already exists
                if os.path.exists(JSON_DIR):
                    # print(f"Search {name} already exists")
                    continue
                else:
                    # print(f"Creating search {name}")
                    filters = json.loads(line)
                    
                    request = await cl.create_search(name=name,
                                                    search_filter=filters,
                                                    item_types=["PSScene"])
                    with open(JSON_DIR, 'w') as f:
                        f.write(json.dumps(request))
            
            # Close the progress bar
            pbar.close()



Download the searches

In [None]:
await process_files()

In [5]:
from aiofiles import open as aio_open
from tqdm.asyncio import tqdm


async def process_file(dir, cl, pbar):
    file_path = os.path.join(SEARCH_DIR, dir)

    async with aio_open(file_path, "r+") as f:
        # Load the existing JSON data
        search = json.loads(await f.read())
        pbar.update(1)
        # if search has already been run, skip
        if "results" in search.keys():
            return

        # Run the search and collect results
        items = cl.run_search(search_id=search['id'], limit=100)
        item_list = [i async for i in items]

        # Update the search dictionary with results
        search["results"] = item_list

        # Move the file pointer to the beginning
        await f.seek(0)

        # Write the updated JSON data
        await f.write(json.dumps(search, indent=4))

        # Truncate the file to remove any leftover data
        await f.truncate()

async def process_files(files, cl):
    pbar = tqdm(total=len(files))
    
    # Process files concurrently
    await asyncio.gather(*(process_file(dir, cl, pbar) for dir in files))
    
    # Close the progress bar
    pbar.close()

def chunk_files(file_list, chunk_size):
    for i in range(0, len(file_list), chunk_size):
        yield file_list[i:i + chunk_size]

In [None]:
all_files = os.listdir(SEARCH_DIR)

async with Session() as sess:
    cl = sess.client('data')

    for file_chunk in chunk_files(all_files, 2):
        asyncio.run(process_files(file_chunk, cl))

In [36]:
# copy the first ten files in SEARCH_DIR into "../data/searches_alt"
import shutil
import os

files = os.listdir(SEARCH_DIR)[:10]
for f in files:
    shutil.copy(SEARCH_DIR + '/' + f, "../data/searches_alt/" + f)



In [None]:
# print time for each item
for item in item_list:
    if item['properties']['quality_category'] != 'test':
        print(f"acquired: {item['properties']['acquired']}, clear percent: {item['properties']['clear_percent']}, cloud percent: {item['properties']['cloud_percent']}, heavy haze percent: {item['properties']['heavy_haze_percent']}, light haze percent: {item['properties']['light_haze_percent']}, snow ice percent: {item['properties']['snow_ice_percent']}, shadow percent: {item['properties']['shadow_percent']}, visible percent: {item['properties']['visible_percent']}, visible confidence percent: {item['properties']['visible_confidence_percent']}")