# 4. Estimate GWL from the given data on GEE

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
from gee_scripts.get_sources import get_explanatory_composite
from gee_scripts.parameters import explain_vars
from gee_scripts.get_sources import get_s1_str_dates
from gee_scripts.gee import export_classifier, estimate_to_gee

import ee

## 1. 📰 Read the data

In [None]:
# This dataframe contains all the field + explanatory variables (training data)
df = pd.read_csv("data/9_clean_training_data/all_training_data_with_extra_and_locations_and_precipSum.csv", parse_dates=["date"])
assert len(df) == 32783, "The length of the dataframe is not correct" # Check that the length of the dataframe is correct

In [None]:
# This table contains the training data for the model (field + explanatory variables) but as a gee table
training_data = ee.FeatureCollection("projects/ee-indonesia-gwl/assets/all_training_data_with_extra_and_locations_and_precipSum")

# Check the consistency of the dataframe and the table
assert len(df.columns) == len(training_data.first().propertyNames().getInfo()), "The number of features in the dataframe and the table are not the same"
assert 32783 == training_data.size().getInfo(), "The number of rows in the dataframe and the table are not the same"

print("Both the dataframe and the table have the same number of features and rows")

## 2. ✅ Subset of the data to train the model (training data)

In [None]:
# The following code is used to filter the training data according with previous analysis

# best_kalimantan_phus = [297., 350., 351., 352., 357., ]
best_kalimantan_phus = [350, 351, 357, 379] # This is the list of the best phus obtained from the previous analysis
bad_stations = ['batok1','batok2','brg11','brg13','brg16','BRG_620309_01','BRG_620309_02','BRG_630805_01','BRG_630708_01']

# We filter the training data (in the gee table) to only include the best phus and exclude the bad stations
training_data = (
    training_data
        .filter(
            ee.Filter.And(
                ee.Filter.inList("phu_id", best_kalimantan_phus),
                ee.Filter.inList("id", bad_stations).Not()
            )
        )
)
training_data.size().getInfo()

In [None]:
# Train a model using the training data and save it to the gee as an asset

# Define a meaningful name for the model
model_name = "kalimantan_phu_model_350_351_357_379_no_bad_stations"

# This function exports the classifier to the gee (track the task in https://code.earthengine.google.com/tasks)
model_gee_id = export_classifier(training_data, model_name)

 ## 📊 3. Estimate using the trained model 

##############################################

In [None]:
#####################################
# SET PARAMETERS FOR THE ESTIMATION #
#####################################

phus_asset_id = "projects/ee-indonesia-gwl/assets/all_phus_numbered"
target_phu_number = 253
target_aoi = ee.FeatureCollection(phus_asset_id).filter(
    ee.Filter.eq("phu_id", target_phu_number)
).first().geometry().bounds()

# define a range of dates export the images
start_date = "2024-05-01"
end_date = "2024-07-01"

all_str_date = get_s1_str_dates(target_aoi, start_date, end_date)

print(f"There are {len(all_str_date)} images/dates to export")
print(f"First date: {all_str_date[0]}")
print(f"Last date: {all_str_date[-1]}")

##############################################

## ⚙️ Run the model 

In [None]:
# Create the tasks!!!

aoi_name = f"PHU_{target_phu_number}"
ee_classifier = ee.Classifier.load(model_gee_id)

# Next line will create a list of tasks to export the images to the gee
tasks = [
    estimate_to_gee(aoi_name, target_aoi, target_date, ee_classifier)
    .start()
    for target_date 
    in all_str_date
]

In [None]:
# Run the tasks

# [task.start() for task in tasks]

# Monitor them in https://code.earthengine.google.com/tasks

# 🚨 Do not run the code below: it will stop all the tasks in GEE 🚨

In [None]:
from pathlib import Path

image_collection = "projects/ee-indonesia-gwl/assets/gwl-modeling/estimation/best_model"
destination_id = Path(image_collection)/Path(list_of_images[0]).name
str(destination_id)

In [None]:
ic = ee.ImageCollection("projects/ee-indonesia-gwl/assets/gwl-modeling/estimation/best_model")

In [None]:
ic.first().get("date").getInfo()

In [None]:
running = [operation for operation in ee.data.listOperations() if operation["metadata"]["state"] == "RUNNING"]
pending = [operation for operation in ee.data.listOperations() if operation["metadata"]["state"] == "PENDING"]

print(f"Running: {len(running)}, Pending: {len(pending)}")