In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from datetime import datetime
import os

import geopandas as gpd
import keras

import gee
import utils

In [None]:
region_name = 'amazon_1'

tile_size = 576 # this is the around the max size that GEE exports allow with 12-band imagery
tile_padding = 24

start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 12, 1)
clear_threshold = 0.6

pred_threshold = 0.5

In [None]:
# load a list of models
base_model = '48px_v3.7'
model_versions = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i']
model_names = [f'{base_model}-{v}_2024-01-17' for v in model_versions]
models = [keras.models.load_model(f'../models/{model_name}.h5') for model_name in model_names]

In [None]:
region = gpd.read_file(f'../data/boundaries/amazon_basin/{region_name}.geojson').geometry[0].__geo_interface__
tiles = utils.create_tiles(region, tile_size, tile_padding)
print(f"Created {len(tiles):,} tiles")

data_pipeline = gee.S2_Data_Extractor(
    tiles, 
    start_date, 
    end_date, 
    clear_threshold, 
    batch_size=500
    )

In [None]:
data_pipeline.make_predictions(models, pred_threshold=pred_threshold)

In [None]:
# write the predictions to a file
print(len(data_pipeline.predictions), 'chips with predictions above', pred_threshold)
# write the predictions to a file
model_version_name = '_'.join(base_model.split('_')[0:2])
# if the outputs directory does not exist, create it
if not os.path.exists(f'../data/outputs/{model_version_name}_{len(models)}-model_ensemble'):
    os.makedirs(f'../data/outputs/{model_version_name}_{len(models)}-model_ensemble')
time_period = f"{start_date.month}_{start_date.year}-{end_date.month}_{end_date.year}"
data_pipeline.predictions.to_file(f"../data/outputs/{model_version_name}/{region_name}_{model_version_name}_{pred_threshold:.2f}_{time_period}.geojson", driver="GeoJSON")