In [1]:
%load_ext autoreload
%autoreload 2

In [81]:
from gee_scripts.get_sources import get_explanatory_composite
from gee_scripts.parameters import explain_vars
from gee_scripts.get_sources import get_s1_dates
import datetime

from gee_scripts.directories import get_export_folder
import ee

## 1. Train the classifier in GEE and save the model as asset

In [9]:
# This table contains the training data for the model
training_data = ee.FeatureCollection("projects/ee-marortpab/assets/FAO/indonesia/gwl/df_with_models")

In [23]:
import ee.batch


def export_classifier(model_name):
    """Export and save a classifier to GEE"""


    features=training_data.filter(ee.Filter.eq(model_name, 1))
    print(f"Exporting model {model_name} with {features.size().getInfo()} samples")

    n_trees = 250

    classifier = ee.Classifier.smileRandomForest(n_trees).setOutputMode('REGRESSION').train(
        features=features, 
        classProperty = "gwl_cm", 
        inputProperties=explain_vars
    )

    description = f"RandomForest_{model_name}_trees_{n_trees}"

    # Create a folder to store the models
    model_gee_id = str(get_export_folder("models")/description)

    if ee.data.getInfo(model_gee_id):
        raise ValueError(f"Model {model_gee_id} already exists")

    task = ee.batch.Export.classifier.toAsset(classifier, description, model_gee_id)
    task.start()

    print(f"Exported model {model_gee_id}")


In [26]:
# We have intentified three potential models: model5, model7, and model17
export_classifier("model7")

Exporting model model7 with 19116 samples
Exported model projects/sepal-ui-421413/assets/gwl-modeling/models/RandomForest_model7


## 2. Estimate using the trained model

https://code.earthengine.google.com/1fd31fe53d5a8cf8b812552f901325a0

## 2.1. Set parameters

In [None]:
max_days_offset = 30
scale = 100
target_date = "2022-04-21"

In [30]:
phus = ee.FeatureCollection("users/marortpab/FAO/SEPAL/2023_trainings/smm/AOI__Province__865_PHUs__INDONESIA")
target_phu = phus.filter(ee.Filter.eq("PHU_NAME", "KHG Sungai Siak - Sungai Kampar")).geometry()
max_days_offset = 30
scale = 100




In [110]:
s1_dates = get_s1_dates(target_phu)

In [114]:
str_date = datetime.datetime.fromtimestamp(s1_dates[0]/1000).strftime('%Y-%m-%d')
str_date

'2015-01-02'

In [115]:
def export_estimation(model_name, target_date, target_phu):
    """Export the estimated GWL image for a given model and target date."""

    str_date = datetime.datetime.fromtimestamp(target_date/1000).strftime('%Y-%m-%d')

    # Get explanatory composite closest to target date
    image = get_explanatory_composite(
        target_date=str_date, 
        ee_region=target_phu, 
        max_days_offset=1
    ).select(explain_vars)

    output_image_name = f"{model_name}_PHU_SungaiKampar_{str_date}"
    ee_classifier = ee.Classifier.load(f"projects/sepal-ui-421413/assets/gwl-modeling/models/RandomForest_{model_name}")
    estimated_image = image.select(explain_vars).classify(ee_classifier).set({"model": model_name, "date": target_date})

    # create export task
    task = ee.batch.Export.image.toAsset(
        **{
            "image": estimated_image,
            "description": output_image_name,
            "assetId": str(get_export_folder(output_folder=f"estimation/{model_name}")/output_image_name),
            "region": target_phu,
            "scale": 100,
        }
    )
    return task

In [116]:
model_name = "model7"
tasks = [export_estimation(model_name, target_date, target_phu) for target_date in s1_dates[:10]]



In [117]:
[task.start() for task in tasks]

[None, None, None, None, None, None, None, None, None, None]

# 2. Estimate GWL using the trained model

If you have a rasterio error, it is because this package is not installed in our virtual environment, to fix this error, you can go to the terminal and:

- Go to the gwl-folder (cd "path_to_gwl-modeling_folder")
- run this command: python3 data/init_venv.py

Or in the terminal:

- type: activate_venv
- search the gwl-modeling virtual environment
- write the number of the virtual environment and type enter
- type: pip install rasterio

In [None]:
from pathlib import Path
import rasterio as rio
import numpy as np
import joblib
from sklearn.ensemble import RandomForestClassifier
from gee_scripts.directories import explanatory_path, output_estimation_path, model_path

In [None]:
input_image_name = Path("WorkshopIndonesia_test_3.tif")
model_path = Path(model_path/"PHU_136.joblib")

# load the model with joblib
estimator = joblib.load(model_path)

In [None]:
# Open the explanatory composite
with rio.open(explanatory_path/input_image_name) as src:
    img_array = src.read()
    meta = src.meta

In [None]:
transposed_img = np.transpose(img_array, (1, 2, 0))
reshaped_img = transposed_img.reshape(-1, 28)

predicted_values = estimator.predict(reshaped_img)

# Reshape the predicted values back to original shape
predicted_values = predicted_values.reshape(transposed_img.shape[:2])

In [None]:
# Reshape back to 2D grid
output_array = predicted_values.reshape((meta['height'], meta['width']))

# Update metadata for 1 band output
meta.update({'count': 1})

# Save to disk
with rio.open(output_estimation_path/f"{model_name.stem}_{input_image_name.stem}_estimated.tif", 'w', **meta) as dst:
    dst.write(output_array, 1)

# Estimate GWL over one point in multiple dates

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("data/9_clean_training_data/clean_training_data.csv")
len(df)

In [None]:
# drop all stations with less tan 9 observations
min_obs = 9
df.groupby('id').agg({'date': 'count'}).sort_values(by='date', ascending=False).reset_index()
df = df.groupby('id').filter(lambda group: len(group) >= min_obs)
df