# Estimate GWL from the given data on GEE

In [2]:
%load_ext autoreload
%autoreload 2

In [30]:
import pandas as pd
from gee_scripts.get_sources import get_explanatory_composite
from gee_scripts.parameters import explain_vars
from gee_scripts.get_sources import get_s1_str_dates
from gee_scripts.gee import export_classifier
from gee_scripts.directories import get_export_folder
from pathlib import Path
from gee_scripts.directories import create_image_collection

import ee

## 1. Train the classifier in GEE and save the model as asset

In [9]:
# This dataframe contains all the field + explanatory variables (training data)
df = pd.read_csv("data/9_clean_training_data/all_training_data_with_extra_and_locations_and_precipSum.csv", parse_dates=["date"])
assert len(df) == 32783, "The length of the dataframe is not correct" # Check that the length of the dataframe is correct

In [10]:
# This table contains the training data for the model (field + explanatory variables) but as a gee table
training_data = ee.FeatureCollection("projects/ee-indonesia-gwl/assets/all_training_data_with_extra_and_locations_and_precipSum")

# Check the consistency of the dataframe and the table
assert len(df.columns) == len(training_data.first().propertyNames().getInfo()), "The number of features in the dataframe and the table are not the same"
assert 32783 == training_data.size().getInfo(), "The number of rows in the dataframe and the table are not the same"

print("Both the dataframe and the table have the same number of features and rows")

Both the dataframe and the table have the same number of features and rows


## 2. Define the training data (subset of the data used to train the model)

In [26]:
# The following code is used to filter the training data according with previous analysis

best_kalimantan_phus = [357., 297., 350., 351., 352.]
best_kalimantan_phus = [357, 350, 351, 379] # This is Pablo's selection
bad_stations = ['batok1','batok2','brg11','brg13','brg16','BRG_620309_01','BRG_620309_02','BRG_630805_01','BRG_630708_01']

# We filter the training data (in the gee table) to only include the best phus and exclude the bad stations
training_data = (
    training_data
        .filter(
            ee.Filter.And(
                ee.Filter.inList("phu_id", best_kalimantan_phus),
                ee.Filter.inList("id", bad_stations).Not()
            )
        )
)
training_data.size().getInfo()

1546

In [28]:
# Train a model using the training data and save it to the gee as an asset

# Define a meaningful name for the model
model_name = "kalimantan_phu_model_357_350_351_379_Pablo_no_bad_stations"

# This function exports the classifier to the gee (track the task in https://code.earthengine.google.com/tasks)
model_gee_id = export_classifier(training_data, model_name)

Exporting model kalimantan_phu_model_357_350_351_379_Pablo_no_bad_stations with 1546 samples
Exported model projects/ee-indonesia-gwl/assets/gwl-modeling/models/RandomForest_kalimantan_phu_model_357_350_351_379_Pablo_no_bad_stations_trees_250


## 2. Estimate using the trained model

## 2.1. Set parameters
##############################################

In [34]:
phus_asset_id = "projects/ee-indonesia-gwl/assets/all_phus_numbered"
target_phu_number = 351
target_phu = ee.FeatureCollection(phus_asset_id).filter(
    ee.Filter.eq("phu_id", target_phu_number)
).first().geometry().bounds()

# define a range of dates export the images
start_date = "2018-01-01"
end_date = "2024-07-01"

all_str_date = get_s1_str_dates(target_phu, start_date, end_date)

print(f"There are {len(all_str_date)} images/dates to export")

There are 308 images/dates to export


##############################################

In [None]:
def export_estimation(aoi_name, ee_aoi, str_date, ee_classifier, ):
    """Export the estimated GWL image for a given model and target date."""

    model_name = Path(ee_classifier.getInfo()["id"]).name
    
    # Get explanatory composite closest to target date
    image = get_explanatory_composite(
        target_date=str_date, 
        ee_region=ee_aoi, 
    ).select(explain_vars)

    output_image_name = f"{aoi_name}_{str_date}"
    estimated_image = image.select(explain_vars).classify(ee_classifier).set({"model": model_name, "date": str_date})

    export_folder = get_export_folder(output_folder=f"estimation/best_models/")
    image_collection_path = create_image_collection(export_folder/model_name)

    # create export task
    task = ee.batch.Export.image.toAsset(
        **{
            "image": estimated_image,
            "description": output_image_name,
            "assetId": str(image_collection_path/output_image_name),
            "region": ee_aoi,
            "scale": 100,
        }
    )

In [None]:
# Create the tasks!!!

aoi_name = f"PHU_{target_phu_number}"
ee_classifier = ee.Classifier.load(model_gee_id)
phus = ee.FeatureCollection("projects/ee-indonesia-gwl/assets/All_phus_numbered")

tasks = [
    export_estimation(aoi_name, target_phu, target_date, ee_classifier) 
    for target_date 
    in all_str_date
]

In [None]:
# Run the tasks

[task.start() for task in tasks]

# Monitor them in https://code.earthengine.google.com/tasks

## Legacy code

# Do not use the code below. It is just for reference.

In [None]:
from pathlib import Path

image_collection = "projects/ee-indonesia-gwl/assets/gwl-modeling/estimation/best_model"
destination_id = Path(image_collection)/Path(list_of_images[0]).name
str(destination_id)

In [None]:
ic = ee.ImageCollection("projects/ee-indonesia-gwl/assets/gwl-modeling/estimation/best_model")

In [None]:
ic.first().get("date").getInfo()

In [None]:
running = [operation for operation in ee.data.listOperations() if operation["metadata"]["state"] == "RUNNING"]
pending = [operation for operation in ee.data.listOperations() if operation["metadata"]["state"] == "PENDING"]

print(f"Running: {len(running)}, Pending: {len(pending)}")

In [None]:
[ee.data.cancelOperation(operation["name"]) for operation in pending+running]

In [None]:
image_ids = ic.toList(ic.size()).getInfo()
image_ids = [image["id"] for image in image_ids]

for image_id in image_ids:

    image = ee.Image(image_id)
    date = image.get("date")
    image.set({"system:time_start": date})

    ee.data.updateAsset(image_id, image, [])

    break    