In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from gee_scripts.get_sources import get_explanatory_composite
from gee_scripts.parameters import explain_vars
import ee
ee.Initialize()

# 1. Get explanatory composite

## 1.1. Set parameters

In [None]:
max_days_offset = 30
scale = 10
target_date = "2022-04-21"
output_image_name = "Test_area_2022-04-21_PHU_136_real"
ee_region = ee.Geometry.Polygon(
[[[113.95619838694482, -1.917946314958298],
          [113.95619838694482, -2.1656677599461855],
          [114.10520046214013, -2.1656677599461855],
          [114.10520046214013, -1.917946314958298]]]
);

In [None]:
# Get explanatory composite closest to target date
image = get_explanatory_composite(
    target_date=target_date, 
    ee_region=ee_region, 
    max_days_offset=max_days_offset
).select(explain_vars)

In [None]:
# Create a task and start it
task = ee.batch.Export.image.toDrive(
    image=image,
    description=output_image_name,
    folder="WorkshopIndonesia",
    fileNamePrefix=output_image_name,
    region=ee_region,
    scale=scale,
    maxPixels=1e13,
)

# Start the task
task.start()

# 2. Estimate GWL using the trained model

If you have a rasterio error, it is because this package is not installed in our virtual environment, to fix this error, you can go to the terminal and:

- Go to the gwl-folder
- run this command: python3 data/init_venv.py

Or in the 

In [1]:
from pathlib import Path
import rasterio as rio
import numpy as np
import joblib
from sklearn.ensemble import RandomForestClassifier

In [14]:
explanatory_path = Path("data/11_explanatory_composites")
output_estimation_path = Path("data/12_estimated_gwl")
explanatory_path.mkdir(exist_ok=True)
output_estimation_path.mkdir(exist_ok=True)

input_image_name = Path("WorkshopIndonesia_test_3.tif")
model_name = Path("data/10_model/PHU_136.joblib")

# load the model with joblib
estimator = joblib.load(model_name)

In [15]:
# Open the explanatory composite
with rio.open(explanatory_path/input_image_name) as src:
    img_array = src.read()
    meta = src.meta

In [16]:
transposed_img = np.transpose(img_array, (1, 2, 0))
reshaped_img = transposed_img.reshape(-1, 28)

predicted_values = estimator.predict(reshaped_img)

# Reshape the predicted values back to original shape
predicted_values = predicted_values.reshape(transposed_img.shape[:2])



In [17]:
# Reshape back to 2D grid
output_array = predicted_values.reshape((meta['height'], meta['width']))

# Update metadata for 1 band output
meta.update({'count': 1})

# Save to disk
with rio.open(output_estimation_path/f"{model_name.stem}_{input_image_name.stem}_estimated.tif", 'w', **meta) as dst:
    dst.write(output_array, 1)

# Estimate GWL over one point in multiple dates

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("data/9_clean_training_data/clean_training_data.csv")
len(df)

In [None]:
# drop all stations with less tan 9 observations
min_obs = 9
df.groupby('id').agg({'date': 'count'}).sort_values(by='date', ascending=False).reset_index()
df = df.groupby('id').filter(lambda group: len(group) >= min_obs)
df