In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from datetime import datetime

import geopandas as gpd
import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import gee_data

In [None]:
model_name = '48px_v3.1_2023-12-02'

region_name = 'ghana_ashanti'

tile_size = 576
tile_padding = 24

chip_width = 48
chip_stride = 24

start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 12, 1)
clear_threshold = 0.6

pred_threshold = 0.5

In [None]:
# start a timer
start = datetime.now()
model = keras.models.load_model(f'../models/{model_name}.h5')

region = gpd.read_file(f'../data/boundaries/{region_name}.geojson').geometry[0].__geo_interface__

tiles = gee_data.create_tiles(region, tile_size, tile_padding)
print(f"{len(tiles)} tiles have been created")

preds_df = gee_data.get_image_data(tiles, start_date, end_date, model, clear_threshold)
# end the timer
end = datetime.now()

# print the time it took to run the pipeline
area_m2 = len(tiles) * (tile_size * 10) ** 2
# convert the meters squared to hectares
area_ha = area_m2 / 10000
duration = end - start
minutes, seconds = divmod(duration.total_seconds(), 60)
print(f"{area_ha:,.0f} hectares were analyzed in {minutes:.0f} minutes and {seconds:.0f} seconds")
print(f"At this speed, you could process an area the size of Rhode Island in {313900 * duration.total_seconds() / area_ha:.0f} seconds")
minutes, seconds = divmod(2203 * 313900 * duration.total_seconds() / area_ha, 60)
print(f"and the Amazon basin in {minutes / 60:,.1f} hours ({minutes / 60 / 24:,.1f} days)")

In [None]:
# write the predictions to a file
print(len(preds_df), 'chips with predictions above', pred_threshold)
# write the predictions to a file
model_version_name = '_'.join(model_name.split('_')[0:2])
preds_df.to_file(f"../data/outputs/{region_name}_{model_version_name}_{pred_threshold:.2f}_{end_date.date()}.geojson", driver="GeoJSON")