In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from datetime import datetime
import os

import geopandas as gpd
import keras

import gee_data

In [4]:
model_name = '48px_v3.1_2023-12-02'

region_name = 'amazon_4'

tile_size = 576
tile_padding = 24

chip_width = 48
chip_stride = 24

start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 12, 1)
clear_threshold = 0.6

pred_threshold = 0.5

In [5]:
# start a timer
start = datetime.now()
model = keras.models.load_model(f'../models/{model_name}.h5')

region = gpd.read_file(f'../data/boundaries/amazon_basin/{region_name}.geojson').geometry[0].__geo_interface__

tiles = gee_data.create_tiles(region, tile_size, tile_padding)
print(f"{len(tiles)} tiles have been created")
preds_df = gee_data.get_image_data(tiles, start_date, end_date, model, clear_threshold)
# end the timer
end = datetime.now()

# print the time it took to run the pipeline
area_m2 = len(tiles) * (tile_size * 10) ** 2
# convert the meters squared to hectares
area_ha = area_m2 / 10000
duration = end - start
minutes, seconds = divmod(duration.total_seconds(), 60)
print(f"{area_ha:,.0f} hectares were analyzed in {minutes:.0f} minutes and {seconds:.0f} seconds")
print(f"At this speed, you could process an area the size of Rhode Island in {313900 * duration.total_seconds() / area_ha:.0f} seconds")
minutes, seconds = divmod(2203 * 313900 * duration.total_seconds() / area_ha, 60)
print(f"and the Amazon basin in {minutes / 60:,.1f} hours ({minutes / 60 / 24:,.1f} days)")     # 2203 Rhode Islands in the Amazon basin                                                                                                                        

Metal device set to: Apple M3 Pro

systemMemory: 36.00 GB
maxCacheSize: 13.50 GB



2023-12-04 21:17:18.778071: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-12-04 21:17:18.778231: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


28525 tiles have been created


2023-12-04 21:17:32.910577: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-12-04 21:17:32.963494: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


94,639,104 hectares were analyzed in 601 minutes and 43 seconds
At this speed, you could process an area the size of Rhode Island in 120 seconds
and the Amazon basin in 73.3 hours (3.1 days)


In [6]:
# write the predictions to a file
print(len(preds_df), 'chips with predictions above', pred_threshold)
# write the predictions to a file
model_version_name = '_'.join(model_name.split('_')[0:2])
# if the outputs directory does not exist, create it
if not os.path.exists(f'../data/outputs/{model_version_name}'):
    os.makedirs(f'../data/outputs/{model_version_name}')
time_period = f"{start_date.month}_{start_date.year}-{end_date.month}_{end_date.year}"
preds_df.to_file(f"../data/outputs/{model_version_name}/{region_name}_{model_version_name}_{pred_threshold:.2f}_{time_period}.geojson", driver="GeoJSON")

27448 chips with predictions above 0.5
