In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from datetime import datetime
import os

import numpy as np
import geopandas as gpd
import keras

import gee
import utils

In [None]:
old_model_version = '48px_v3.5'
model_name = '48px_v3.7_2024-01-10'

region_name = 'amazon_4'

tile_size = 576 # this is the around the max size that GEE exports allow with 12-band imagery
tile_padding = 24

start_date = datetime(2023, 1, 1)
end_date = datetime(2024, 1, 1)
clear_threshold = 0.6

pred_threshold = 0.5

# load the model up here to make processing printout cleaner
model = keras.models.load_model(f'../models/{model_name}.h5')

region = gpd.read_file(f'../data/boundaries/amazon_basin/{region_name}.geojson').geometry[0].__geo_interface__

tiles = utils.create_tiles(region, tile_size, tile_padding)
print(f"Created {len(tiles):,} tiles")

In [None]:
prior_predictions = gpd.read_file(f"../data/outputs/48px_v3.5/{region_name}_{old_model_version}_0.50_1_2023-1_2024.geojson")

# Convert the list of tiles to a GeoDataFrame
tiles_gdf = gpd.GeoDataFrame(geometry=[tile.geometry for tile in tiles])
# Set the CRS to WGS84
tiles_gdf = tiles_gdf.set_crs(epsg=4326)

# Perform the spatial join
threshold = 0.5
intersecting_tiles = gpd.sjoin(tiles_gdf, prior_predictions[prior_predictions['pred'] > threshold], how='inner', predicate='intersects')
positive_tiles = [tiles[i] for i in np.unique(intersecting_tiles.index)]
print(f"{len(positive_tiles):,} tiles intersect with the predictions at a threshold of {threshold}")

In [None]:
# start a timer
start = datetime.now()

data_pipeline = gee.S2_Data_Extractor(
    positive_tiles, 
    start_date, 
    end_date, 
    clear_threshold, 
    batch_size=500
    )
preds = data_pipeline.make_predictions(model, pred_threshold=pred_threshold)

# end the timer
end = datetime.now()

# print the time it took to run the pipeline
area_m2 = len(positive_tiles) * (tile_size * 10) ** 2
# convert the meters squared to hectares
area_ha = area_m2 / 10000
duration = end - start
minutes, seconds = divmod(duration.total_seconds(), 60)
print(f"{area_ha:,.0f} hectares were analyzed in {minutes:.0f} minutes and {seconds:.0f} seconds")
print(f"At this speed, you could process an area the size of Rhode Island in {313900 * duration.total_seconds() / area_ha:.0f} seconds")
minutes, seconds = divmod(2203 * 313900 * duration.total_seconds() / area_ha, 60)
# ~2203 Rhode Islands in the Amazon basin
print(f"and the Amazon basin in {minutes / 60:,.1f} hours ({minutes / 60 / 24:,.1f} days)")

In [None]:
# write the predictions to a file
print(len(preds), 'chips with predictions above', pred_threshold)
# write the predictions to a file
model_version_name = '_'.join(model_name.split('_')[0:2])
# if the outputs directory does not exist, create it
if not os.path.exists(f'../data/outputs/{model_version_name}'):
    os.makedirs(f'../data/outputs/{model_version_name}')
time_period = f"{start_date.month}_{start_date.year}-{end_date.month}_{end_date.year}"
preds.to_file(f"../data/outputs/{model_version_name}/{region_name}_{model_version_name}_rerun_{old_model_version}_{pred_threshold:.2f}_{time_period}.geojson")