# Model Evaluation on Breizhcrops dataset

### Dataset split:

- Train: regions FRH01, FRH02, FRH03, 
- Test: region FRH04

Use only L1C for now


In [29]:
import os
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

import sys
sys.path.append("..")
from aitlas.datasets import BreizhCropsDataset
from aitlas.utils import get_class

In [30]:

test_dataset_config = {
                    "regions":["frh04"],
                    "root":"../data/breizhcrops_dataset",
                    "year":2017,
                    "level":"L1C",
                    "batch_size": 1024,
                    "shuffle": False,
                    "num_workers": 4,
                    "transforms": ["aitlas.transforms.SelectBands"]
                }
train_dataset_config = {
                    "regions":["frh01", "frh02", "frh03"],
                    "root":"../data/breizhcrops_dataset",
                    "year":2017,
                    "level":"L1C",
                    "batch_size": 1024,
                    "shuffle": False,
                    "num_workers": 4,
                    "transforms": ["aitlas.transforms.SelectBands"]
                }


## Explore train dataset 

In [None]:
train_dataset = BreizhCropsDataset(train_dataset_config)

train_dataset.show_samples()

In [None]:
fig1 = train_dataset.show_timeseries(100)

In [None]:
train_dataset.data_distribution_table()
fig2 = train_dataset.data_distribution_barchart()

In [None]:
train_dataset.parcel_distribution_table()

## Explore test dataset

In [None]:
test_dataset = BreizhCropsDataset(test_dataset_config)

test_dataset.show_samples()

In [None]:
fig1 = test_dataset.show_timeseries(100)

In [None]:
test_dataset.data_distribution_table()
fig2 = test_dataset.data_distribution_barchart()

In [None]:
test_dataset.parcel_distribution_table()


# Training the models

The models need to be trained before running this notebook, using breizhcrops_train_and_evaluate files:

python -m aitlas.run configs/breizhcrops/breizhcrops_train_and_evaluate_<modelname>.json


The following models were included in this evaluation:

```
"OmniScaleCNN", "TempCNN","MSResNet", "InceptionTime", "LSTM","StarRNN","TransformerEncoder"

```

In [None]:
import sklearn.metrics

def calc_metrics(y_true, y_pred):
    accuracy = sklearn.metrics.accuracy_score(y_true, y_pred)
    kappa = sklearn.metrics.cohen_kappa_score(y_true, y_pred)
    f1_micro = sklearn.metrics.f1_score(y_true, y_pred, average="micro")
    f1_macro = sklearn.metrics.f1_score(y_true, y_pred, average="macro")
    f1_weighted = sklearn.metrics.f1_score(y_true, y_pred, average="weighted")
    recall_micro = sklearn.metrics.recall_score(y_true, y_pred, average="micro")
    recall_macro = sklearn.metrics.recall_score(y_true, y_pred, average="macro")
    recall_weighted = sklearn.metrics.recall_score(y_true, y_pred, average="weighted")
    precision_micro = sklearn.metrics.precision_score(y_true, y_pred, average="micro")
    precision_macro = sklearn.metrics.precision_score(y_true, y_pred, average="macro")
    precision_weighted = sklearn.metrics.precision_score(y_true, y_pred, average="weighted")

    return dict(
        accuracy=accuracy,
        kappa=kappa,
        f1_micro=f1_micro,
        f1_macro=f1_macro,
        f1_weighted=f1_weighted,
        recall_micro=recall_micro,
        recall_macro=recall_macro,
        recall_weighted=recall_weighted,
        precision_micro=precision_micro,
        precision_macro=precision_macro,
        precision_weighted=precision_weighted,
    )

Set parameters for best models

In [None]:

model_configs = {
    "inceptiontime" : {
        "model": {
            "classname": "aitlas.models.InceptionTime",
            "config": {
                "input_dim":13,
                "num_classes": 9,
                "learning_rate": 0.00896,
                "weight_decay" : 0.00000222,
                "num_layers" : 3,
                "hidden_dims" : 128,
                "metrics":["accuracy","f1_score", "kappa"]
            }
        }
    },
    "lstm" : {
        "model": {
            "classname": "aitlas.models.LSTM",
            "config": {
                "input_dim":13,
                "num_classes": 9,
                "learning_rate": 0.00988,
                "weight_decay" : 0.000000526,
                "num_layers" : 4,
                "hidden_dims" : 128,
                "dropout" : 0.5713,
                "metrics":["accuracy","f1_score", "kappa"]
            }
        }
    },
    "msresnet" : {
        "model": {
            "classname": "aitlas.models.MSResNet",
            "config": {
                "input_dim":13,
                "num_classes": 9,
                "pretrained":false,
                "learning_rate": 0.000000627,
                "weight_decay" : 0.00000475,
                "hidden_dims" : 32,
                "metrics":["accuracy","f1_score", "kappa"]
            }
        }
    },
    "starrnn" : {
        "model": {
            "classname": "aitlas.models.StarRNN",
            "config": {
                "input_dim":13,
                "num_classes": 9,
                "learning_rate": 0.00896,
                "weight_decay" : 0.00000222,
                "num_layers" : 3,
                "hidden_dims" : 128,
                "metrics":["accuracy","f1_score", "kappa"]
            }
        }
    },
    "oscnn" : { 
        "model": {
            "classname": "aitlas.models.OmniScaleCNN",
            "config": {
                "input_dim":13,
                "num_classes": 9,
                "learning_rate": 0.001066,
                "weight_decay" : 0.000000225,
                "metrics":["accuracy","f1_score", "kappa"]
            }
        }
    },
    "transformer" : {
        "model": {
            "classname": "aitlas.models.TransformerModel",
            "config": {
                "input_dim":13,
                "num_classes": 9,
                "learning_rate": 0.00131,
                "d_model" : 64,
                "dropout" : 0.4,
                "metrics":["accuracy","f1_score", "kappa"]
            }
        }
    },
    "tempcnn" : {
        "model": {
            "classname": "aitlas.models.TempCNN",
            "config": {
                "input_dim":13,
                "num_classes": 9,
                "learning_rate": 0.000238,
                "weight_decay" : 0.0000518,
                "kernel_size" : 7,
                "hidden_dims" : 128,
                "dropout" : 0.18,
                "metrics":["accuracy","f1_score", "kappa"]
            }
        }
    }
}



Experiment Folder structure
```
examples/
    experiment/
        breizhcrops/
            <model>/
                checkpoint_<ts>_.pth.tar
            ...
```

In [None]:
def load(run):
    # 
    # implement the prediction here

    dataset = BreizhCropsDataset(test_dataset_config)

    model_path = os.path.join(logdir,run,"checkpoint_pth.tar")
    model_cls = get_class(model_configs[run]['model']['classname'])
    model = model_cls(model_configs[run]['model']['config'])
    model.prepare()
    model.load_model(model_path)
    y_true, y_pred, _ = model.predict(dataset)
    rs = pd.DataFrame([y_pred,y_true,field_ids],index=["y_pred","y_true","field_ids"]).T.set_index("field_ids")
    return rs, y_score

def load_table(logdir):
    runs = [ run for run in os.listdir(logdir) if  os.path.isdir(logdir)]
    #runs = ["LSTM","OmniScaleCNN","MSResNet","StarRNN","TempCNN","TransformerEncoder", "InceptionTime"]

    stats = list()
    for run in runs:
        rs, _ = load(run)
        stat = calc_metrics(rs.y_true,rs.y_pred)
        stat["model"] = run
        stats.append(stat)
    stats = pd.DataFrame(stats).set_index("model")

    df = stats.T
    #df["RF"] = ""

    #models = ["RF","OmniScaleCNN", "TempCNN","MSResNet", "InceptionTime", "LSTM","StarRNN","TransformerEncoder"]
    table = df.loc[["accuracy","recall_macro","f1_macro","f1_weighted","kappa"]]
    table.index = ["overall accuracy","average accuracy","class-mean f-score","weighted f-score","kappa-metric"]
    return table

### Compile L1C Results

In [None]:
logdir = "experiment/breizhcrops"

l1tables = []
table = load_table(logdir).T
table["fold"] = fold
l1tables.append(table)


In [None]:
meantable = pd.concat(l1tables).groupby(["model"]).mean()
stdtable = pd.concat(l1tables).groupby(["model"]).std()

metrics = ["overall accuracy","average accuracy","class-mean f-score","weighted f-score","kappa-metric"]
models = ["OmniScaleCNN","TempCNN","MSResNet", "InceptionTime", "LSTM", "StarRNN", "TransformerEncoder"]
for metric in metrics:
    entries = list()
        
    for model in models:
        entries.append("$" + f"{meantable.loc[model,metric]:.2f}"+"^{\\pm "+ f"{stdtable.loc[model,metric]:.2f}" + "}$")
    row = f"{metric} & & " + " & ".join(entries)
    row += " \\\ "
    print(row)

In [None]:
pd.concat(l1tables).groupby(["model","fold"]).first()

### Compile L2A Results

In [None]:
l2tables = []
for fold in [1,2,3,4]:
    table = load_table(f"{logdir}/L2A/{fold}").T
    table["fold"] = fold
    l2tables.append(table)

In [None]:

meantable = pd.concat(l2tables).groupby(["model"]).mean()
stdtable = pd.concat(l2tables).groupby(["model"]).std()

metrics = ["overall accuracy","average accuracy","class-mean f-score","weighted f-score","kappa-metric"]
models = ["OmniScaleCNN","TempCNN","MSResNet", "InceptionTime", "LSTM", "StarRNN", "TransformerEncoder"]
for metric in metrics:
    entries = list()
        
    for model in models:
        entries.append("$" + f"{meantable.loc[model,metric]:.2f}"+"^{\\pm "+ f"{stdtable.loc[model,metric]:.2f}" + "}$")
    row = f"{metric} & & " + " & ".join(entries)
    row += " \\\ "
    print(row)

In [None]:
pd.concat(l2tables).groupby(["model","fold"]).first()