# 5. Estimate GWL from the given data on GEE

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from gee_scripts.get_sources import get_explanatory_composite
from gee_scripts.parameters import explain_vars
from gee_scripts.get_sources import get_s1_str_dates
from gee_scripts.gee import export_classifier, estimate_to_gee

import ee

Earth Engine initialized successfully, with ee-indonesia-gwl


## 1. 📰 Read the data

In [3]:
# This dataframe contains all the field + explanatory variables (training data)
df = pd.read_csv("data/9_clean_training_data/all_training_data_with_extra_and_locations_and_precipSum.csv", parse_dates=["date"])
assert len(df) == 32783, "The length of the dataframe is not correct" # Check that the length of the dataframe is correct

In [4]:
# This table contains the training data for the model (field + explanatory variables) but as a gee table
training_data = ee.FeatureCollection("projects/ee-indonesia-gwl/assets/all_training_data_with_extra_and_locations_and_precipSum")

# Check the consistency of the dataframe and the table
assert len(df.columns) == len(training_data.first().propertyNames().getInfo()), "The number of features in the dataframe and the table are not the same"
assert 32783 == training_data.size().getInfo(), "The number of rows in the dataframe and the table are not the same"

print("Both the dataframe and the table have the same number of features and rows")

Both the dataframe and the table have the same number of features and rows


## 2. ✅ Subset of the data to train the model (training data)

In [5]:
# The following code is used to filter the training data according with previous analysis

# best_kalimantan_phus = [297., 350., 351., 352., 357., ]
best_kalimantan_phus = [350, 351, 357, 379] # This is Pablo's selection
bad_stations = ['batok1','batok2','brg11','brg13','brg16','BRG_620309_01','BRG_620309_02','BRG_630805_01','BRG_630708_01']

# We filter the training data (in the gee table) to only include the best phus and exclude the bad stations
training_data = (
    training_data
        .filter(
            ee.Filter.And(
                ee.Filter.inList("phu_id", best_kalimantan_phus),
                ee.Filter.inList("id", bad_stations).Not()
            )
        )
)
training_data.size().getInfo()

1546

In [10]:
# Train a model using the training data and save it to the gee as an asset

# Define a meaningful name for the model
model_name = "kalimantan_phu_model_350_351_357_379_Pablo_no_bad_stations"

# This function exports the classifier to the gee (track the task in https://code.earthengine.google.com/tasks)
model_gee_id = export_classifier(training_data, model_name)

Exporting model kalimantan_phu_model_350_351_357_379_Pablo_no_bad_stations with 1546 samples
Alert!!! Model projects/ee-indonesia-gwl/assets/gwl-modeling/models/RandomForest_kalimantan_phu_model_350_351_357_379_Pablo_no_bad_stations_trees_250 already exists


 ## 📊 3. Estimate using the trained model 

##############################################

In [20]:
#####################################
# SET PARAMETERS FOR THE ESTIMATION #
#####################################

phus_asset_id = "projects/ee-indonesia-gwl/assets/all_phus_numbered"
target_phu_number = 352
target_aoi = ee.FeatureCollection(phus_asset_id).filter(
    ee.Filter.eq("phu_id", target_phu_number)
).first().geometry().bounds()

# define a range of dates export the images
start_date = "2018-01-01"
end_date = "2024-07-01"

all_str_date = get_s1_str_dates(target_aoi, start_date, end_date)

print(f"There are {len(all_str_date)} images/dates to export")
print(f"First date: {all_str_date[0]}")
print(f"Last date: {all_str_date[-1]}")

There are 434 images/dates to export
First date: 2018-01-05
Last date: 2024-06-27


##############################################

## ⚙️ Run the model 

In [21]:
# Create the tasks!!!

aoi_name = f"PHU_{target_phu_number}"
ee_classifier = ee.Classifier.load(model_gee_id)

# Next line will create a list of tasks to export the images to the gee
tasks = [
    estimate_to_gee(aoi_name, target_aoi, target_date, ee_classifier).start()
    for target_date 
    in all_str_date
]

Composite for date:  2018-01-05  created.
Composite for date:  2018-01-12  created.
Composite for date:  2018-01-17  created.
Composite for date:  2018-01-24  created.
Composite for date:  2018-01-29  created.
Composite for date:  2018-02-05  created.
Composite for date:  2018-02-17  created.
Composite for date:  2018-02-22  created.
Composite for date:  2018-03-01  created.
Composite for date:  2018-03-06  created.
Composite for date:  2018-03-13  created.
Composite for date:  2018-03-18  created.
Composite for date:  2018-03-25  created.
Composite for date:  2018-03-30  created.
Composite for date:  2018-04-06  created.
Composite for date:  2018-04-11  created.
Composite for date:  2018-04-18  created.
Composite for date:  2018-04-23  created.
Composite for date:  2018-04-30  created.
Composite for date:  2018-05-05  created.
Composite for date:  2018-05-12  created.
Composite for date:  2018-05-17  created.
Composite for date:  2018-05-24  created.
Composite for date:  2018-05-29  c

In [15]:
# Run the tasks

# [task.start() for task in tasks]

# Monitor them in https://code.earthengine.google.com/tasks

# 🚨 Do not run the code below: it will stop all the tasks in GEE 🚨

In [None]:
from pathlib import Path

image_collection = "projects/ee-indonesia-gwl/assets/gwl-modeling/estimation/best_model"
destination_id = Path(image_collection)/Path(list_of_images[0]).name
str(destination_id)

In [None]:
ic = ee.ImageCollection("projects/ee-indonesia-gwl/assets/gwl-modeling/estimation/best_model")

In [None]:
ic.first().get("date").getInfo()

In [22]:
running = [operation for operation in ee.data.listOperations() if operation["metadata"]["state"] == "RUNNING"]
pending = [operation for operation in ee.data.listOperations() if operation["metadata"]["state"] == "PENDING"]

print(f"Running: {len(running)}, Pending: {len(pending)}")

Running: 3, Pending: 425
