In [None]:
%load_ext autoreload
%autoreload 2

# Debug 3-months classification export

In [None]:
import ee
import geemap

from src.utils.gee import init_gee

init_gee()

from omegaconf import OmegaConf
from src.gee.constants import ASSETS_PATH
from src.gee.classification.utils import infer_and_compute_metrics
from src.gee.classification.model import get_features_names
import datetime as dt

In [None]:
def get_post_periods(post_periods_3months):
    if post_periods_3months:
        post = [
            ("2021-02-24", "2021-05-23"),
            ("2021-05-24", "2021-08-23"),
            ("2021-08-24", "2021-11-23"),
            ("2021-11-24", "2022-02-23"),
            ("2022-02-24", "2022-05-23"),
            ("2022-05-24", "2022-08-23"),
            ("2022-08-24", "2022-11-23"),
            ("2022-11-24", "2023-02-23"),
        ]
    else:
        post = [("2021-02-24", "2022-02-23"), ("2022-02-24", "2023-02-23")]
    return post

def get_classifier_id(cfg):
    n_trees = cfg.model_kwargs.n_estimators
    all_data = "_all_data" if cfg.train_on_all else ""

    start_post = dt.datetime.strptime(cfg.data.time_periods.post[0][0], "%Y-%m-%d")
    end_post = dt.datetime.strptime(cfg.data.time_periods.post[0][1], "%Y-%m-%d")
    n_days_between = (end_post - start_post).days
    period = "3months" if n_days_between in range(87,93) else "oneyear" if n_days_between in range(363,367) else "other"
    asset_id = f"{ASSETS_PATH}s1tsdd_Ukraine/{cfg.run_name}/classifier_{period}_{n_trees}trees{all_data}_export_trees"
    return asset_id

def start(post_periods_3months, **kwargs):
    cfg = OmegaConf.create(
        dict(
            aggregation_method="mean",
            model_name="random_forest",
            model_kwargs=dict(
                #min_samples_leaf=1,
                #n_jobs=12,
                **kwargs
            ),
            data=dict(
                aois_test=[f"UKR{i}" for i in range(1, 19) if i not in [1, 2, 3, 4]],
                damages_to_keep=[1, 2],
                extract_winds=["3x3"],  # ['1x1', '3x3', '5x5']
                time_periods={
                    "pre": [("2020-02-24", "2021-02-23")],  # always only one
                    "post": get_post_periods(post_periods_3months),
                },
                time_periods_inference={"pre": ("2020-02-24", "2021-02-23"), "post": ("2022-02-24", "2023-02-23")},
            ),
            reducer_names=["mean", "stdDev", "median", "min", "max", "skew", "kurtosis"],
            export_as_trees=True,
            train_on_all=False,  # train on all damages (train + test split)
            verbose=1,
            seed=123,
            run_name="240301",
        )
    )

    classifier = ee.Classifier.smileRandomForest(**cfg.model_kwargs)

    run_name = cfg.run_name
    split = 'train'
    period = "1year" if len(cfg.data.time_periods.post) == 2 else "3months"
    asset_path = ASSETS_PATH + f"s1tsdd_Ukraine/{run_name}/features_ready_{split}_{period}"
    fc_train = ee.FeatureCollection(asset_path)

    split = 'test'
    asset_path = ASSETS_PATH + f"s1tsdd_Ukraine/{run_name}/features_ready_{split}_{period}"
    fc_test = ee.FeatureCollection(asset_path)

    print(f"Training on {fc_train.size().getInfo()} samples")
    print(f"Testing on {fc_test.size().getInfo()} samples")
    classifier = classifier.train(features=fc_train, classProperty="label", inputProperties=get_features_names(cfg))

    return classifier, fc_train, fc_test

clf, fc_train, fc_test = start(post_periods_3months=True, numberOfTrees=100, minLeafPopulation=3, maxNodes=10e3) #minLeafPopulation=5

# Evaluate on Test Set

In [None]:
from src.gee.classification.utils import infer_and_compute_metrics

infer_and_compute_metrics(fc_test, clf, True)

## Test

In [None]:
import ee
from omegaconf import DictConfig, OmegaConf
from typing import List
from tqdm import tqdm

from src.gee.constants import ASSETS_PATH
from src.gee.utils import init_gee
from src.gee.classification.inference import predict_geo
from src.gee.classification.model import get_classifier_trained
from src.utils.gdrive import get_files_in_folder

init_gee()
from src.utils.gdrive import create_drive_folder, create_yaml_file_in_drive_from_config_dict

cfg = OmegaConf.create(
    dict(
        aggregation_method="mean",
        model_name="random_forest",
        model_kwargs=dict(
            n_estimators=100,
            min_samples_leaf=1,
            n_jobs=12,
        ),
        data=dict(
            aois_test=[f"UKR{i}" for i in range(1, 19) if i not in [1, 2, 3, 4]],
            damages_to_keep=[1, 2],
            extract_winds=["3x3"],  # ['1x1', '3x3', '5x5']
            time_periods={  # to train
                "pre": ("2020-02-24", "2021-02-23"),  # always only one
                "post": "1year",
            },
            time_periods_inference={  # to predict
                "pre": ("2020-02-24", "2021-02-23"),  # always only one
                "post": ("2022-02-24", "2023-02-23"),
            },
        ),
        reducer_names=["mean", "stdDev", "median", "min", "max", "skew", "kurtosis"],
        train_on_all=False,  # train on all damages (train + test split)
        verbose=0,
        export_as_trees=False,
        seed=123,
        run_name=240301,
    )
)

# Load classifier
def get_classifier_id(cfg):
    n_trees = cfg.model_kwargs.n_estimators
    all_data = "_all_data" if cfg.train_on_all else ""
    export = "_export_tree" if cfg.export_as_trees else ""
    asset_id = f"{ASSETS_PATH}s1tsdd_Ukraine/{cfg.run_name}/classifier_{cfg.data.time_periods.post}_{n_trees}trees{all_data}{export}"
    return asset_id

asset_id = get_classifier_id(cfg)
print(asset_id)
from src.gee.classification.model import load_classifier

classifier = load_classifier(asset_id)

In [None]:
split = 'test'
asset_path = ASSETS_PATH + f"s1tsdd_Ukraine/{cfg.run_name}/features_ready_{split}_{cfg.data.time_periods.post}"
fc_test = ee.FeatureCollection(asset_path)
fc_test.size()

In [None]:
from src.gee.classification.utils import infer_and_compute_metrics
infer_and_compute_metrics(fc_test, classifier)