# Train and Evaluate Models


In [1]:
import sys
sys.path.append("..")

import breizhcrops

from breizhcrops.models.LongShortTermMemory import LSTM
from breizhcrops.models.TransformerEncoder import TransformerEncoder
from breizhcrops.models.TempCNN import TempCNN
from breizhcrops.models.MSResNet import MSResNet

from breizhcrops import BreizhCrops
import torch.optim
from tqdm import tqdm
import numpy as np

#### BreizhCrops Pytorch Dataloader

In [2]:
root = "../data"

padded_value = -1
sequencelength = 45

bands = ['B1', 'B10', 'B11', 'B12', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8',
   'B8A', 'B9', 'QA10', 'QA20', 'QA60', 'doa']

selected_bands = ['B1', 'B10', 'B11', 'B12', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9']

selected_band_idxs = np.array([bands.index(b) for b in selected_bands])

device = torch.device("cpu")

def transform(x):
    x = x[x[:, 0] != padded_value, :] # remove padded values

    # choose selected bands
    x = x[:,selected_band_idxs] * 1e-4 # scale reflectances to 0-1

    # choose with replacement if sequencelength smaller als choose_t
    replace = False if x.shape[0] >= sequencelength else True
    idxs = np.random.choice(x.shape[0], sequencelength, replace=replace)
    idxs.sort()

    x = x[idxs]

    return torch.from_numpy(x).type(torch.FloatTensor).to(device)

def target_transform(y):
    y = frh01.mapping.loc[y].id
    return torch.tensor(y, dtype=torch.long, device=device)

frh01 = BreizhCrops(root=root, region="frh01", transform=transform, target_transform=transform, padding_value=padded_value)
frh04 = BreizhCrops(root=root, region="frh04", transform=transform, target_transform=transform, padding_value=padded_value)
#frh02 = BreizhCrops(root=root, region="frh02", transform=transform, target_transform=transform, padded_value=padded_value)
#frh03 = BreizhCrops(root=root, region="frh03", transform=transform, target_transform=transform, padded_value=padded_value)


Initializing BreizhCrops region frh01
Initializing BreizhCrops region frh04


In [6]:
padded_value = -1
sequencelength = 45

bands = ['B1', 'B10', 'B11', 'B12', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8',
   'B8A', 'B9', 'QA10', 'QA20', 'QA60', 'doa']

selected_bands = ['B1', 'B10', 'B11', 'B12', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9']

selected_band_idxs = np.array([bands.index(b) for b in selected_bands])

def transform(x):
    x = x[x[:, 0] != padded_value, :] # remove padded values

    # choose selected bands
    x = x[:,selected_band_idxs] * 1e-4 # scale reflectances to 0-1

    # choose with replacement if sequencelength smaller als choose_t
    replace = False if x.shape[0] >= sequencelength else True
    idxs = np.random.choice(x.shape[0], sequencelength, replace=replace)
    idxs.sort()

    x = x[idxs]

    return torch.from_numpy(x).type(torch.FloatTensor).to(device)

def target_transform(y):
    y = frh01.mapping.loc[y].id
    return torch.tensor(y, dtype=torch.long, device=device)

datapath = "../data" # "/data2/Breizhcrops"

frh04 = breizhcrops.BreizhCrops(region="frh04", root=datapath, transform=transform,
                                target_transform=target_transform, padding_value=padded_value)
frh01 = breizhcrops.BreizhCrops(region="frh01", root=datapath, transform=transform,
                                target_transform=target_transform, padding_value=padded_value)
frh02 = breizhcrops.BreizhCrops(region="frh02", root=datapath, transform=transform,
                                target_transform=target_transform, padding_value=padded_value)
frh03 = breizhcrops.BreizhCrops(region="frh03", root=datapath, transform=transform,
                                target_transform=target_transform, padding_value=padded_value)



Initializing BreizhCrops region frh04
Initializing BreizhCrops region frh01
Initializing BreizhCrops region frh02
Initializing BreizhCrops region frh03


NameError: name 'DataLoader' is not defined

## Torch Dataloaders

In [7]:
from torch.utils.data import DataLoader
frh01frh02 = torch.utils.data.ConcatDataset([frh01,frh02])
traindataloader = DataLoader(frh01frh02, batch_size=256, shuffle=True, num_workers=0)
valdataloader = DataLoader(frh03, batch_size=256, shuffle=False, num_workers=0)
testdataloader = DataLoader(frh04, batch_size=256, shuffle=False, num_workers=0)

In [12]:
class AverageMetric:
    def __init__(self):
        self.values = list()
    
    def add(self, new):
        self.values.append(new)
    
    def get(self):
        return np.array(self.values).mean()
    
def train(model, optimizer, dataloader, epochs):
    model.train()

    if torch.cuda.is_available():
        model = model.cuda()

    for epoch in range(epochs):
        loss_log = AverageMetric()

        for iteration, data in enumerate(dataloader):
            optimizer.zero_grad()

            inputs, targets = data

            if torch.cuda.is_available():
                inputs = inputs.cuda()
                targets = targets.cuda()

            logprobabilities = model.forward(inputs)

            loss = torch.nn.functional.nll_loss(logprobabilities, targets)
            loss_log.add(loss.cpu().detach().numpy())

            loss.backward()
            optimizer.step()

        print("Epoch {}: loss {:.2f}".format(epoch,loss_log.get()))

    return model

## Train the Long Short-Term Memory Network

In [None]:
lstm = LSTM(input_dim=13, hidden_dims=128, num_classes=13, num_layers=3, dropout=0.2, bidirectional=True,
                 use_layernorm=True)

epochs=5

optimizer = torch.optim.Adam(
        filter(lambda x: x.requires_grad, lstm.parameters()),
        betas=(0.9, 0.98), eps=1e-09)

lstm = train(lstm, optimizer, traindataloader, epochs)

## Train the Transformer Encoder

In [5]:
transformer = TransformerEncoder(in_channels=13, len_max_seq=50,
    d_word_vec=128, d_model=128, d_inner=512,
    n_layers=4, n_head=4, d_k=32, d_v=32,
    dropout=0.2, nclasses=13)

workers=1
epochs=5
warmup_steps = 500

optimizer = models.transformer.Optim.ScheduledOptim(
    torch.optim.Adam(
        filter(lambda x: x.requires_grad, transformer.parameters()),
        betas=(0.9, 0.98), eps=1e-09),
    transformer.d_model, warmup_steps)

dataloader = torch.utils.data.DataLoader(dataset=frh01, batch_size=64, num_workers=4)

transformer = train(transformer, optimizer, dataloader, epochs)

Epoch 0: loss 1.30
Epoch 1: loss 0.96
Epoch 2: loss 0.89
Epoch 3: loss 0.85
Epoch 4: loss 0.83


## Train the TempCNN

In [4]:
tempcnn = TempCNN(input_dim=13, nclasses=13, sequence_length=45, kernel_size=5,hidden_dims=64,dropout=0.5)

workers=1
epochs=5

optimizer = torch.optim.Adam(
        filter(lambda x: x.requires_grad, tempcnn.parameters()),
        betas=(0.9, 0.98), lr=1e-3, eps=1e-09, weight_decay=1e-6)

dataloader = torch.utils.data.DataLoader(dataset=frh01, batch_size=64, num_workers=4)

tempcnn = train(tempcnn, optimizer, dataloader, epochs)

Epoch 0: loss 1.41
Epoch 1: loss 1.19
Epoch 2: loss 1.10
Epoch 3: loss 1.06
Epoch 4: loss 1.03


## Train the MSresnet

In [6]:
msresnet = MSResNet(input_channel=13, num_classes=13, hidden_dims=32)

workers=1
epochs=5

optimizer = torch.optim.Adam(
        filter(lambda x: x.requires_grad, msresnet.parameters()),
        betas=(0.9, 0.98), lr=1e-3, eps=1e-09, weight_decay=1e-5)

dataloader = torch.utils.data.DataLoader(dataset=frh01, batch_size=64, num_workers=4)

msresnet = train(msresnet, optimizer, dataloader, epochs)

Epoch 0: loss 1.03
Epoch 1: loss 0.86
Epoch 2: loss 0.81
Epoch 3: loss 0.78
Epoch 4: loss 0.75


## Load pre-trained models

requires downloaded model files
```
cd models
bash download.sh
```

In [6]:
trainedlstm = LSTM(input_dim=13, hidden_dims=128, nclasses=13, num_rnn_layers=3, dropout=0.2, bidirectional=True,
                 use_batchnorm=False, use_layernorm=True)

trainedlstm.load("models/BreizhCrops_rnn/model.pth")

trainedtransformer = TransformerEncoder(in_channels=13, len_max_seq=407,
    d_word_vec=128, d_model=128, d_inner=512,
    n_layers=4, n_head=4, d_k=32, d_v=32,
    dropout=0.2, nclasses=13)

trainedtransformer.load("models/BreizhCrops_transformer/model.pth")


if torch.cuda.is_available():
    trainedlstm = trainedlstm.cuda()
    trainedtransformer = trainedtransformer.cuda()

loading model from models/BreizhCrops_rnn/model.pth
loading model from models/BreizhCrops_transformer/model.pth


## Evaluation

convenience functions

In [7]:
def test(model, dataloader):
    model.eval()

    logprobabilities = list()
    targets_list = list()
    inputs_list = list()

    for iteration, data in tqdm(enumerate(dataloader)):

        inputs, targets = data

        if torch.cuda.is_available():
            inputs = inputs.cuda()
            targets = targets.cuda()

        inputs_list.append(inputs.cpu().detach().numpy())
        targets_list.append(targets[:,0].cpu().detach().numpy())
        logprobabilities.append(model.forward(inputs.transpose(1,2)).cpu().detach().numpy())
        
    return np.vstack(logprobabilities), np.vstack(inputs_list), np.concatenate(targets_list) # np.vstack(targets_list)


def confusion_matrix_to_accuraccies(confusion_matrix):

    confusion_matrix = confusion_matrix.astype(float)
    # sum(0) <- predicted sum(1) ground truth

    total = np.sum(confusion_matrix)
    n_classes, _ = confusion_matrix.shape
    overall_accuracy = np.sum(np.diag(confusion_matrix)) / total

    # calculate Cohen Kappa (https://en.wikipedia.org/wiki/Cohen%27s_kappa)
    N = total
    p0 = np.sum(np.diag(confusion_matrix)) / N
    pc = np.sum(np.sum(confusion_matrix, axis=0) * np.sum(confusion_matrix, axis=1)) / N ** 2
    kappa = (p0 - pc) / (1 - pc)

    recall = np.diag(confusion_matrix) / (np.sum(confusion_matrix, axis=1) + 1e-12)
    precision = np.diag(confusion_matrix) / (np.sum(confusion_matrix, axis=0) + 1e-12)
    f1 = (2 * precision * recall) / ((precision + recall) + 1e-12)

    # Per class accuracy
    cl_acc = np.diag(confusion_matrix) / (confusion_matrix.sum(1) + 1e-12)

    return overall_accuracy, kappa, precision, recall, f1, cl_acc

def build_confusion_matrix(targets, predictions):
    
    nclasses = len(np.unique(targets))
    cm, _, _ = np.histogram2d(targets, predictions, bins=nclasses)
    
    return cm

def print_report(overall_accuracy, kappa, precision, recall, f1, cl_acc):
    
    report="""
    overall accuracy: \t{:.2f}
    kappa \t\t{:.2f}
    precision \t\t{:.2f}
    recall \t\t{:.2f}
    f1 \t\t\t{:.2f}
    """.format(overall_accuracy, kappa, precision.mean(), recall.mean(), f1.mean())

    print(report)
    
def evaluate(model, dataset,batchsize=32, workers=4):
    dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batchsize, num_workers=workers)

    logprobabilites, inputs, targets = test(model, dataloader)
    predictions = logprobabilites.argmax(1)
    
    confusion_matrix = build_confusion_matrix(targets, predictions)
    print_report(*confusion_matrix_to_accuraccies(confusion_matrix))
    
    return confusion_matrix

### Run evaluation on FRH04 region
compare pre-trained LSTM/Transformer model with Table 1

![title](doc/table1.png)

In [8]:
print("This LSTM")
cm = evaluate(lstm, frh04)

print("Pre-trained LSTM")
cm = evaluate(trainedlstm, frh04)

print("This Transformer")
cm = evaluate(transformer, frh04)

print("Pre-trained Transformer")
cm = evaluate(trainedtransformer, frh04)

This LSTM


3937it [00:25, 151.64it/s]



    overall accuracy: 	0.57
    kappa 		0.48
    precision 		0.43
    recall 		0.39
    f1 			0.37
    
Pre-trained LSTM


3937it [00:25, 154.38it/s]



    overall accuracy: 	0.68
    kappa 		0.62
    precision 		0.63
    recall 		0.58
    f1 			0.59
    
This Transformer


3937it [00:28, 137.25it/s]



    overall accuracy: 	0.62
    kappa 		0.53
    precision 		0.51
    recall 		0.48
    f1 			0.45
    
Pre-trained Transformer


3937it [00:29, 134.83it/s]



    overall accuracy: 	0.69
    kappa 		0.63
    precision 		0.60
    recall 		0.56
    f1 			0.57
    
