## Read CE points and export as table to GEE

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import geopandas as gpd
from config import config_imp as conf
import ee

ee.Initialize()

In [None]:
data = pd.read_csv(conf.data_file)
data.columns.to_list();

In [None]:
geodata = gpd.GeoDataFrame(data, geometry=gpd.points_from_xy(data["lon"], data["lat"]))
geodata;

In [None]:
geodata.__geo_interface__.keys()

In [None]:
fc = ee.FeatureCollection(geodata.__geo_interface__)

In [None]:
task = ee.batch.Export.table.toAsset(
    fc, "kenya_impress", "projects/planet-afk/assets/kenya_impress"
)

In [None]:
task.start()

## Download patches from planet using GEE

Based on https://gist.github.com/gorelick-google/4c015b79119ef85313b8bef6d654e2d9

In [None]:
import ee
import logging
import multiprocessing
import requests
import shutil
import backoff
from retry import retry
from pathlib import Path
import concurrent.futures


ee.Initialize(opt_url="https://earthengine-highvolume.googleapis.com")

In [None]:
@retry(tries=10, delay=1, backoff=2)
def getResult(_, feature):
    """Handle the HTTP requests to download an image."""

    # Generate the desired image from the given point.
    region = ee.Feature(feature).geometry()
    name = feature["properties"]["id"]

    image = (
        ee.ImageCollection("projects/planet-nicfi/assets/basemaps/africa")
        .filter(ee.Filter.date("2022-06-01", "2022-08-31"))
        .mean()
    ).clip(region)

    # Fetch the URL from which to download the image.
    url = image.getDownloadURL(
        {"scale": 4.77, "region": image.geometry(), "format": "GEO_TIFF"}
    )

    # Handle downloading the actual pixels.
    r = requests.get(url, stream=True)
    if r.status_code != 200:
        r.raise_for_status()

    file_path = (Path.home() / "1_modules/4_MultiLabel/train-impress/fn").with_name(
        f"{name}.tif"
    )

    with open(file_path, "wb") as out_file:
        shutil.copyfileobj(r.raw, out_file)

    print("Done: ", name)

### Download points from GEE

- We start from a point list in CSV (notebook 0).
- Then, as I want to keep the processed points in GEE (to display purposes) I'm uploading them in the first step.
- Then, I do the same process backwards to create the buffer and the bounds directly in GEE (why? because I don't want to deal with reprojections or use distances in degrees)

In [None]:
def get_patch(offset):
    print(f"working with {offset}")
    return (
        ee.FeatureCollection(fc.toList(batch_size, offset))
        .map(lambda center: center.buffer(70).bounds())
        .getInfo()
    )


workers = 4
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:

    fc = ee.FeatureCollection("projects/planet-afk/assets/kenya_impress")
    n_points = fc.size().getInfo()
    batch_size = min(int(n_points / workers), 5000)

    offsets = [0] + [x * batch_size for x in range(1, (n_points // batch_size) + 1)]

    futures = [executor.submit(get_patch, offset) for offset in offsets]
    batch_features = []
    for future in concurrent.futures.as_completed(futures):
        batch_features.append(future.result())

In [None]:
features = [features for batch in batch_features for features in batch["features"]]
len(features)

In [None]:
# Check we're getting the same amount of elements
n_points == len(features)

In [None]:
if __name__ == "__main__":

    logging.basicConfig()
    pool = multiprocessing.Pool(25)
    pool.starmap(getResult, enumerate(features))
    pool.close()

In [None]:
(features[0])

In [None]:
# plots = ee.FeatureCollection("projects/planet-afk/assets/kenya_14100_to_8850")

# input_image = (ee.ImageCollection("projects/planet-nicfi/assets/basemaps/africa")
#          .filter(ee.Filter.date("2018-01-01", "2018-12-31"))
#          .mean()
# )

# input_image = ee.ImageCollection('projects/wri-datalab/TML').mosaic()

# plots = table.map(lambda center: )

In [None]:
# This is an awful way to extract the features, but it works
# I didn't want to create the geometries in pandas, I wanted to use GEE.
f5000 = ee.FeatureCollection(plots.toList(5000)).getInfo()
f5000_10000 = ee.FeatureCollection(plots.toList(5000, 5000)).getInfo()
f5000["features"] = f5000["features"] + f5000_10000["features"]
features = f5000["features"].copy()
features[:5]