In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import ee
import geemap

from src.utils.gee import init_gee

init_gee()

In [None]:
from omegaconf import OmegaConf
import pandas as pd

from src.constants import AOIS_TEST
from src.data import UNOSAT_S1TS_Dataset
from src.classification.model_factory import load_model
from src.classification.trainer import S1TSDD_Trainer

def get_features_extractor(reducer_names):
    def extract_features(df, start, end, prefix=""):
        # columns are datetime -> can slice directly between two dates
        df = df.loc[:, start:end]

        # features
        df_features = pd.DataFrame(index=df.index)
        if "mean" in reducer_names:
            df_features["mean"] = df.mean(axis=1)
        if "stdDev" in reducer_names:
            df_features["std"] = df.std(axis=1)
        if "median" in reducer_names:
            df_features["median"] = df.median(axis=1)
        if "min" in reducer_names:
            df_features["min"] = df.min(axis=1)
        if "max" in reducer_names:
            df_features["max"] = df.max(axis=1)
        if "skew" in reducer_names:
            df_features["skew"] = df.skew(axis=1)
        if "kurtosis" in reducer_names:
            df_features["kurt"] = df.kurt(axis=1)

        # rename columns using band, prefix (eg pre/post/pre_3x3, ...)
        df_vv = df_features.xs("VV", level="band")
        df_vh = df_features.xs("VH", level="band")
        df_vv.columns = [f"VV_{prefix}_{col}" for col in df_vv.columns]
        df_vh.columns = [f"VH_{prefix}_{col}" for col in df_vh.columns]
        return pd.concat([df_vv, df_vh], axis=1)
    return extract_features

cfg = OmegaConf.create(
    dict(
        aggregation_method="mean",
        model_name= "random_forest",
        model_kwargs=dict(
            n_estimators=200,
            min_samples_leaf=2,
            n_jobs=12,
        ),
        data=dict(
            aois_test = [f'UKR{i}' for i in range(1,19) if i not in [1,2,3,4]], # ["UKR6", "UKR8", "UKR12", "UKR15"],
            damages_to_keep=[1,2],
            extract_winds = ['3x3'], # ['1x1', '3x3', '5x5']
            random_neg_labels=0.0,  # percentage of negative labels to add in training set (eg 0.1 for 10%)
            time_periods = {
                'pre': [('2020-02-24', '2021-02-23')],
                'post' : [
                    ('2021-02-24', '2022-02-23'),
                    ('2022-02-24', '2023-02-23')
                ]
            },
        ),
        reducer_names=["mean", "stdDev", "median", "min", "max", "skew", "kurtosis"],
        seed=123,
        run_name=None,
    )
)

ds = UNOSAT_S1TS_Dataset(cfg.data, extract_features=get_features_extractor(cfg.reducer_names))
model = load_model(cfg)
trainer = S1TSDD_Trainer(ds, model, aggregation=cfg.aggregation_method, seed=cfg.seed, verbose=1)
trainer.train_and_test(threshold_for_metrics=0.5)

In [None]:
model = trainer.model
model

In [None]:
import numpy as np
from src.gee.export_local_training import rf_to_strings

trees = rf_to_strings(model, new_features_names, processes=16)


In [None]:
clf = ee.Classifier.decisionTreeEnsemble(trees)

In [None]:
clf

In [None]:
len(trees[0])

In [None]:
import joblib
import geemap.ml as geemap_ml

from src.constants import LOGS_PATH

run_name = 'sliding_window_random10'
logs_folder = LOGS_PATH / run_name
model = joblib.load(logs_folder / f'model/{run_name}.joblib')
model

In [None]:
from src.gee.classification.model import get_features_names
features_names = get_features_names(cfg)
features_names

In [None]:
# rename all features with letter of alphabet, eg 'a', 'b', 'c', ...
def new_name(i):
    if i >= 26:
        return chr(ord('A') + i-26)
    return chr(ord('a') + i)
d_new_names = {k: new_name(i) for i, k in enumerate(features_names)}
d_new_names

In [None]:
new_features_names = list(d_new_names.values())

In [None]:
# NOT WORKING

# trees = geemap_ml.rf_to_strings(model, new_features_names, processes=16, output_mode="PROBABILITY")
# # save as txt file
# with open(f"classifier_{run_name}.txt", "w") as f:
#     f.write(trees)
# ee_classifier = geemap_ml.strings_to_classifier(trees)

In [None]:
# import numpy as np
# from src.data.gee.local_training import rf_to_strings

# trees = rf_to_strings(model, new_features_names, processes=16)

# import json

# d_trees = {i: t for i, t in enumerate(trees)}
# with open(f"rf_{run_name}.json", "w") as fp:
#     json.dump(d_trees, fp, indent=4)

In [None]:
import json

with open(f"rf_{run_name}.json", "r") as fp:
    d_trees = json.load(fp)
trees = list(d_trees.values())

In [None]:
ee_strings = [ee.String(t) for t in trees]
classifier = ee.Classifier.decisionTreeEnsemble(ee_strings)

In [None]:
len(trees[0].encode('utf-8')) / 10**6

In [None]:
decision_trees = ee.List(classifier.explain().get('trees'))

In [None]:
# https://gis.stackexchange.com/questions/432355/how-to-save-random-forest-classifier-within-gee-that-can-be-called-later-for-cla

def encode_feature_collection(value):
    string = ee.String.encodeJSON(value)
    string_length = string.length()
    max_length = 100000
    max_properties = 1000

    values = ee.List.sequence(0, string_length, max_length).map(
        lambda start: ee.Algorithms.If(
            ee.Number(start).lt(string_length),
            string.slice(ee.Number(start), ee.Number(start).add(max_length)),
            None
        )
    ).filter(ee.Filter.neq('item', ''))

    number_of_properties = values.size()

    def create_feature(start):
        start = ee.Number(start)
        end = start.add(max_properties).min(number_of_properties)
        property_values = values.slice(start, end)
        property_keys = ee.List.sequence(1, property_values.size()).map(lambda i: ee.Number(i).format('%d'))
        properties = ee.Dictionary.fromLists(property_keys, property_values)
        return ee.Feature(ee.Geometry.Point([0, 0]), properties)

    return ee.FeatureCollection(ee.List.sequence(0, values.size(), max_properties).map(create_feature).filter(ee.Filter.notNull(['1'])))


In [None]:
trees_encoded = encode_feature_collection(decision_trees)

In [None]:
from src.data.gee.constants import ASSETS_PATH

task = ee.batch.Export.table.toAsset(
    trees_encoded,
    description=f'decision-trees_{run_name}',
    assetId = ASSETS_PATH + f'models_trained/rf_{run_name}'
)
task.start()

In [None]:
# from src.data.gee.constants import ASSETS_PATH

task = ee.batch.Export.classifier.toAsset(
    classifier,
    description = f'rf_{run_name}',
    assetId = ASSETS_PATH + f'models_trained/rf_{run_name}',
)
task.start()