In [None]:
%load_ext autoreload
%autoreload 2

# Classification Multi-Class

In [None]:
from omegaconf import OmegaConf
import pandas as pd

from src.constants import AOIS_TEST
from src.data import UNOSAT_S1TS_Dataset
from src.classification.model_factory import load_model
from src.classification.trainer import S1TSDD_Trainer

In [None]:
def extract_features(df, start, end, prefix=""):

    # columns are datetime -> can slice directly between two dates
    df = df.loc[:, start:end]

    # features
    df_features = pd.DataFrame(index=df.index)
    df_features["mean"] = df.mean(axis=1)
    df_features["std"] = df.std(axis=1)
    df_features["median"] = df.median(axis=1)
    df_features["min"] = df.min(axis=1)
    df_features["max"] = df.max(axis=1)
    df_features["skew"] = df.skew(axis=1)
    df_features["kurt"] = df.kurt(axis=1)

    # rename columns using band, prefix (eg pre/post/pre_3x3, ...)
    df_vv = df_features.xs("VV", level="band")
    df_vh = df_features.xs("VH", level="band")
    df_vv.columns = [f"VV_{prefix}_{col}" for col in df_vv.columns]
    df_vh.columns = [f"VH_{prefix}_{col}" for col in df_vh.columns]
    return pd.concat([df_vv, df_vh], axis=1)


cfg = OmegaConf.create(
    dict(
        aggregation_method="mean",
        model_name="random_forest",
        model_kwargs=dict(
            n_estimators=100,
            n_jobs=12,
        ),
        data=dict(
            aois_test = [f'UKR{i}' for i in range(1,19) if i not in [1,2,3,4]], #["UKR6", "UKR8", "UKR12", "UKR15"],
            damages_to_keep=[1,2,3],
            extract_winds = ['3x3'], # ['1x1', '3x3', '5x5']
            random_neg_labels=0.0,  # percentage of negative labels to add in training set (eg 0.1 for 10%)
            time_periods_pos = dict(
                pre= ('2021-02-24', '2022-02-24'),
                post= ('2022-02-24', '2023-02-24')
            ),
            time_periods_neg = dict(
                pre= ('2020-02-24', '2021-02-24'),
                post= ('2021-02-24', '2022-02-24')
            )
        ),
        seed=123,
        run_name=None,
    )
)

ds = UNOSAT_S1TS_Dataset(cfg.data, extract_features=extract_features)

In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

df = ds.df
X = df[[c for c in df.columns if c.startswith(("VV", "VH"))]].values
y = df["damage"].values

model = RandomForestClassifier(**cfg.model_kwargs, random_state=cfg.seed, class_weight="balanced")

model.fit(X, y)

df_test = ds.df_test
X_test = df_test[[c for c in df_test.columns if c.startswith(("VV", "VH"))]].values
y_test = df_test["damage"].values
preds = model.predict(X_test)

print(classification_report(y_test, preds, target_names=["no damage", "Destroyed", "severe damage", "moderate damage"]))