In [None]:
from sklearn.model_selection import StratifiedKFold
import logging
from skimage.io import imsave
from argparse import Namespace
from collections import defaultdict
from tqdm.auto import tqdm
import pandas as pd
from torch.utils.data import DataLoader, Subset, ConcatDataset
import imagehash
from sklearn.model_selection import train_test_split
from PIL import Image
from copy import copy
import os
from torch.utils.data import Subset, DataLoader
import torch
from torch.utils.data import Subset
from torch.utils.data import ConcatDataset, Subset, DataLoader
import numpy as np

In [None]:
%matplotlib inline

In [None]:

import logging
import sys
logging.getLogger().addHandler(logging.StreamHandler())


In [None]:
!tar -xf /kaggle/input/cassava-merged/dataset.tar.xz

In [None]:

!ls /kaggle/input/timm-pretrained-efficientnet
!mkdir -p /root/.cache/torch/hub/checkpoints/
!cp /kaggle/input/timm-pretrained-efficientnet/efficientnet/efficientnet_b0_ra-3dd342df.pth /root/.cache/torch/hub/checkpoints/efficientnet_b0_ra-3dd342df.pth


In [None]:

!pip install /kaggle/input/timm-package/timm-0.1.26-py3-none-any.whl


# Functions

In [None]:
# file transforms.py

import numpy as np
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torchvision import transforms


def data_preapre_transform(image):
    """Transforms a PIL Image to have aspec ratio 8/6"""
    image = Image.fromarray(np.array(image))
    if image.size[0] < image.size[1]:
        image = image.rotate(90, expand=True)

    # Center crop until 8:6
    width, height = image.size   # Get dimensions

    if round(width/height, 3) != round(8/6, 3):
        new_height = int(height*(width/height * 6/8))
        new_width = width

        left = (width - new_width)/2
        top = (height - new_height)/2
        right = (width + new_width)/2
        bottom = (height + new_height)/2

        image = image.crop((left, top, right, bottom))
    return image


def get_wrapper(transforms):
    def wraps(img):
        return transforms(image=np.array(img))['image']
    return wraps


def get_byol_transforms(width, height):
    byol_transforms = A.Compose([
        A.Resize(width, height),
        A.ToFloat(max_value=1.0),
        ToTensorV2(),
    ])

    return get_wrapper(byol_transforms)


def get_prepare_transforms(width, height):
    prepare_transforms = A.Compose([
        A.Resize(width, height),
    ])

    return get_wrapper(prepare_transforms)


def get_train_transforms(width, height):
    train_transforms = A.Compose([
        A.JpegCompression(quality_lower=95, quality_upper=100, p=0.5),
        A.ColorJitter(p=0.5),
        A.ToFloat(max_value=1.0),
        A.ShiftScaleRotate(p=0.5),
        A.RandomResizedCrop(width, height, scale=(0.1, 0.8)),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        A.CoarseDropout(p=0.5),
        ToTensorV2(),
    ])

    return get_wrapper(train_transforms)


def get_test_transforms(width, height):
    test_transforms = A.Compose([
        A.ToFloat(max_value=1.0),
        A.CenterCrop(width, height),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2(),
    ])

    return get_wrapper(test_transforms)



In [None]:
# file utils.py

import numpy as np
import pandas as pd
import seaborn as sns
import os

from PIL import Image
from skimage import io
from torch.utils.data import Dataset
import torch
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid


class Unnormalize:
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        Returns:
            Tensor: Normalized image.
        """
        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m)
            # The normalize code -> t.sub_(m).div_(s)
        return tensor


def make_confusion_matrix(cf,
                          group_names=None,
                          categories='auto',
                          count=True,
                          percent=True,
                          cbar=True,
                          xyticks=True,
                          xyplotlabels=True,
                          sum_stats=True,
                          figsize=None,
                          cmap='Blues',
                          title=None):

    # CODE TO GENERATE TEXT INSIDE EACH SQUARE
    blanks = ['' for i in range(cf.size)]

    if group_names and len(group_names) == cf.size:
        group_labels = ["{}\n".format(value) for value in group_names]
    else:
        group_labels = blanks

    if count:
        group_counts = ["{0:0.0f}\n".format(value) for value in cf.flatten()]
    else:
        group_counts = blanks

    if percent:
        group_percentages = ["{0:.2%}".format(value) for value in cf.flatten() / np.sum(cf)]
    else:
        group_percentages = blanks

    box_labels = [f"{v1}{v2}{v3}".strip() for v1, v2, v3 in zip(group_labels, group_counts, group_percentages)]
    box_labels = np.asarray(box_labels).reshape(cf.shape[0], cf.shape[1])

    # CODE TO GENERATE SUMMARY STATISTICS & TEXT FOR SUMMARY STATS
    if sum_stats:
        # Accuracy is sum of diagonal divided by total observations
        accuracy = np.trace(cf) / float(np.sum(cf))

        # if it is a binary confusion matrix, show some more stats
        if len(cf) == 2:
            # Metrics for Binary Confusion Matrices
            precision = cf[1, 1] / sum(cf[:, 1])
            recall = cf[1, 1] / sum(cf[1, :])
            f1_score = 2 * precision * recall / (precision + recall)
            stats_text = "\n\nAccuracy={:0.3f}\nPrecision={:0.3f}\nRecall={:0.3f}\nF1 Score={:0.3f}".format(
                accuracy, precision, recall, f1_score)
        else:
            stats_text = "\n\nAccuracy={:0.3f}".format(accuracy)
    else:
        stats_text = ""

    # SET FIGURE PARAMETERS ACCORDING TO OTHER ARGUMENTS
    if figsize == None:
        # Get default figure size if not set
        figsize = plt.rcParams.get('figure.figsize')

    if xyticks == False:
        # Do not show categories if xyticks is False
        categories = False

    # MAKE THE HEATMAP VISUALIZATION
    plt.figure(figsize=figsize)
    sns.heatmap(cf, annot=box_labels, fmt="", cmap=cmap, cbar=cbar, xticklabels=categories, yticklabels=categories)

    if xyplotlabels:
        plt.ylabel('True label')
        plt.xlabel('Predicted label' + stats_text)
    else:
        plt.xlabel(stats_text)

    if title:
        plt.title(title)


def plot_image(img, label=None, ax=None):
    new_img = torch.Tensor(np.array(img))
    label_num_to_disease_map = {0: 'Cassava Bacterial Blight (CBB)',
                                1: 'Cassava Brown Streak Disease (CBSD)',
                                2: 'Cassava Green Mottle (CGM)',
                                3: 'Cassava Mosaic Disease (CMD)',
                                4: 'Healthy'}

    if not ax:
        ax = plt.gca()
    ax.imshow(new_img.permute(2, 1, 0))
    ax.axis('off')
    if label is not None:

        if isinstance(label, int):
            label = label_num_to_disease_map.get(label, 0)
        ax.set_title(f'{label}')


def plot_label_examples(dataset, targets, target_label):
    label_indices = np.where(targets == target_label)[0]

    sample = np.random.choice(label_indices, 6)

    fig = plt.figure(figsize=(20, 10))

    grid = ImageGrid(fig, 111,  # similar to subplot(111)
                     nrows_ncols=(2, 3),  # creates 2x2 grid of axes
                     axes_pad=0.1,  # pad between axes in inch.
                     )

    for ax, idx in zip(grid, sample):
        img, label = dataset[idx]
        assert label == target_label
        plot_image(img, ax=ax)
    plt.suptitle(f'Label {target_label}')
    plt.show()


class DatasetFromSubset(Dataset):
    def __init__(self, subset, transform=None, target_transform=None):
        self.subset = subset
        self.transform = transform
        self.target_transform = target_transform

    def __getattr__(self, item):
        if item in self.__dict__:
            return getattr(self, item)

        return getattr(self.subset.dataset, item)[self.subset.indices]

    def __getitem__(self, index):
        x, y = self.subset[index]
        if self.transform:
            x = self.transform(x)

        if self.target_transform:
            y = self.target_transform(y)
        return x, y

    @property
    def labels(self):
        return self.subset.dataset.labels[self.subset.indices]

    def __len__(self):
        return len(self.subset)


class CassavaDataset(Dataset):
    def __init__(self, root, image_ids, labels, sources=None, transform=None):
        super().__init__()
        self.root = root
        self.image_ids = image_ids
        self.labels = labels
        self.targets = self.labels
        self.sources = sources
        self.transform = transform

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        label = self.labels[idx]
        img = io.imread(os.path.join(self.root, self.image_ids[idx]))

        if self.transform:
            img = self.transform(img)

        return img, label


In [None]:
# file models/model.py

from argparse import Namespace

import torch
from pytorch_lightning.metrics.functional import accuracy
from torch import nn
import timm
import pytorch_lightning as pl
import torch.nn.functional as F



def dfs_freeze(model, unfreeze=False):
    for param in model.parameters():
        param.requires_grad = unfreeze

    for name, child in model.named_children():
        for param in child.parameters():
            param.requires_grad = unfreeze
        dfs_freeze(child, unfreeze=unfreeze)


class LeafDoctorModel(pl.LightningModule):
    def __init__(self, hparams = None, only_train_layers=None):
        super().__init__()
        self.hparams = hparams or Namespace()
        self.only_train_layers = only_train_layers

        self.trunk = timm.create_model('efficientnet_b0', pretrained=True, num_classes=5)

        # Freeze layers that dont require grad
        if only_train_layers:
            dfs_freeze(self.trunk)

            for layer_name_or_getter in only_train_layers:
                if isinstance(layer_name_or_getter, str):
                    layer = getattr(self.trunk, layer_name_or_getter)

                else:
                    layer = layer_name_or_getter(self.trunk)
                dfs_freeze(layer, unfreeze=True)

    def forward(self, x):
        return self.trunk(x)

    def predict_proba(self, x):
        probabilities = nn.functional.softmax(self.forward(x), dim=1)
        return probabilities

    def predict(self, x):
        return torch.max(self.forward(x), 1)[1]

    def configure_optimizers(self):
        trainable_params = list(filter(lambda p: p.requires_grad, self.parameters()))
        optimizer = torch.optim.Adam(trainable_params,
                                      lr=self.hparams.lr or self.hparams.learning_rate,
                                      weight_decay=self.hparams.weight_decay)

        lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,
                                                           max_lr=self.hparams.lr,
                                                           epochs=self.hparams.max_epochs,
                                                           steps_per_epoch=int(23712/self.hparams.batch_size))
        return (
            [optimizer],
            [
                {
                    'scheduler': lr_scheduler,
                    'interval': 'step',
                    'frequency': 1,
                    'reduce_on_plateau': False,
                    'monitor': 'val_loss',
                }
            ]
        )

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = bi_tempered_logistic_loss(y_hat, y,
                                         self.hparams.bitempered_t1,
                                         self.hparams.bitempered_t2,
                                         label_smoothing=self.hparams.label_smoothing)
        acc = accuracy(y_hat, y)
        self.log("train_acc", acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log("train_loss", loss, on_step=True, on_epoch=False, prog_bar=False, logger=True)
        return loss

    @torch.no_grad()
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        acc = accuracy(y_hat, y)
        self.log("val_acc", acc, prog_bar=True, logger=True),
        self.log("val_loss", loss, prog_bar=True, logger=True)


In [None]:
# file models/byol.py

import numpy as np
from argparse import Namespace
from copy import deepcopy
from itertools import chain
from typing import Dict, List
import pytorch_lightning as pl
from albumentations.pytorch import ToTensorV2
from torch import optim
import torch.nn.functional as f
import random
from typing import Callable, Tuple, Union
from kornia import augmentation as aug
from kornia import filters
from kornia.geometry import transform as tf
import torch
from torch import nn, Tensor
import albumentations as A


def normalized_mse(x: Tensor, y: Tensor) -> Tensor:
    x = f.normalize(x, dim=-1)
    y = f.normalize(y, dim=-1)
    return 2 - 2 * (x * y).sum(dim=-1)


class RandomApply(nn.Module):
    def __init__(self, fn: Callable, p: float):
        super().__init__()
        self.fn = fn
        self.p = p

    def forward(self, x: Tensor) -> Tensor:
        return x if random.random() > self.p else self.fn(x)


def default_aug(image_size: Tuple[int, int] = (360, 360)) -> nn.Module:
    return nn.Sequential(
        aug.ColorJitter(contrast=0.1, brightness=0.1, saturation=0.1, p=0.8),
        aug.RandomVerticalFlip(),
        aug.RandomHorizontalFlip(),
        RandomApply(filters.GaussianBlur2d((3, 3), (0.5, 0.5)), p=0.1),
        aug.RandomResizedCrop(size=image_size, scale=(0.5, 1)),
        aug.Normalize(
            mean=torch.tensor([0.485, 0.456, 0.406]),
            std=torch.tensor([0.229, 0.224, 0.225]),
        ),
    )


def mlp(dim: int, projection_size: int = 256, hidden_size: int = 4096) -> nn.Module:
    return nn.Sequential(
        nn.Linear(dim, hidden_size),
        nn.BatchNorm1d(hidden_size),
        nn.ReLU(inplace=True),
        nn.Linear(hidden_size, projection_size),
    )


class EncoderWrapper(nn.Module):
    def __init__(
        self,
        model: nn.Module,
        projection_size: int = 256,
        hidden_size: int = 4096,
        layer: Union[str, int] = -2,
    ):
        super().__init__()
        self.model = model
        self.projection_size = projection_size
        self.hidden_size = hidden_size
        self.layer = layer

        self._projector = None
        self._projector_dim = None
        self._encoded = torch.empty(0)
        self._register_hook()

    @property
    def projector(self):
        if self._projector is None:
            self._projector = mlp(
                self._projector_dim, self.projection_size, self.hidden_size
            )
        return self._projector

    def _hook(self, _, __, output):
        output = output.flatten(start_dim=1)
        if self._projector_dim is None:
            self._projector_dim = output.shape[-1]
        self._encoded = self.projector(output)

    def _register_hook(self):
        if isinstance(self.layer, str):
            layer = dict([*self.model.named_modules()])[self.layer]
        else:
            layer = list(self.model.children())[self.layer]

        layer.register_forward_hook(self._hook)

    def forward(self, x: Tensor) -> Tensor:
        _ = self.model(x)
        return self._encoded


class BYOL(pl.LightningModule):
    def __init__(
        self,
        model: nn.Module,
        image_size: Tuple[int, int] = (360, 360),
        hidden_layer: Union[str, int] = -2,
        projection_size: int = 256,
        hidden_size: int = 4096,
        augment_fn: Callable = None,
        beta: float = 0.99,
        hparams = None,
    ):
        super().__init__()
        self._augment = default_aug(image_size) if augment_fn is None else augment_fn
        self.beta = beta
        self.encoder = EncoderWrapper(
            model, projection_size, hidden_size, layer=hidden_layer
        )
        self.predictor = nn.Linear(projection_size, projection_size, hidden_size)
        self.hparams = hparams or Namespace()
        self._target = None

        self.encoder(torch.zeros(2, 3, *image_size, device=self.device))

    def augment(self, batch):
        if self.hparams.precision == 16:
            return self._augment(batch.double()).to(torch.float16)
        return self._augment(batch)

    def forward(self, x: Tensor) -> Tensor:
        return self.predictor(self.encoder(x))

    @property
    def target(self):
        if self._target is None:
            self._target = deepcopy(self.encoder)
        return self._target

    def update_target(self):
        for p, pt in zip(self.encoder.parameters(), self.target.parameters()):
            pt.data = self.beta * pt.data + (1 - self.beta) * p.data

    def configure_optimizers(self):
        trainable_params = list(filter(lambda p: p.requires_grad, self.parameters()))
        optimizer = optim.AdamW(trainable_params, lr=self.hparams.lr, weight_decay=self.hparams.weight_decay)
        lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,
                                                           max_lr=self.hparams.lr,
                                                           epochs=self.hparams.max_epochs,
                                                           steps_per_epoch=self.hparams.limit_train_batches)
        return (
            [optimizer],
            [
                {
                    'scheduler': lr_scheduler,
                    'interval': 'step',
                    'frequency': 1,
                    'reduce_on_plateau': False,
                    'monitor': 'val_loss',
                }
            ]
        )

    def training_step(self, batch, *_) -> Dict[str, Union[Tensor, Dict]]:
        x = batch[0]
        with torch.no_grad():
            x1, x2 = self.augment(x), self.augment(x)

        pred1, pred2 = self.forward(x1), self.forward(x2)
        with torch.no_grad():
            targ1, targ2 = self.target(x1), self.target(x2)
        loss = torch.mean(normalized_mse(pred1, targ2) + normalized_mse(pred2, targ1))

        self.log("train_loss", float(loss.detach()), on_step=True)
        return {"loss": loss}

    @torch.no_grad()
    def validation_step(self, batch, *_) -> Dict[str, Union[Tensor, Dict]]:
        x = batch[0]
        x1, x2 = self.augment(x), self.augment(x)
        pred1, pred2 = self.forward(x1), self.forward(x2)
        targ1, targ2 = self.target(x1), self.target(x2)
        loss = torch.mean(normalized_mse(pred1, targ2) + normalized_mse(pred2, targ1))

        return {"loss": loss}

    @torch.no_grad()
    def validation_epoch_end(self, outputs: List[Dict]) -> Dict:
        val_loss = sum(x["loss"] for x in outputs) / len(outputs)
        self.log("val_loss", float(val_loss.detach()))


In [None]:
# file node_helpers.py

import logging
from argparse import Namespace

import torch
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping
from torch.utils.data import Subset, DataLoader
from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score, f1_score
from pytorch_lightning.callbacks import LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
from matplotlib import pyplot as plt


def score(predictions, labels):
    return {
        'accuracy': accuracy_score(predictions, labels),
        'f1_score': f1_score(predictions, labels, average='weighted'),
    }


def predict(model, dataset, indices, batch_size=10, num_workers=4, transform=None):
    dataset = DatasetFromSubset(
        Subset(dataset, indices=indices),
        transform=transform)

    loader = DataLoader(dataset,
                        batch_size=batch_size,
                        num_workers=num_workers,
                        shuffle=False,
                        drop_last=False,
                        pin_memory=True)

    predictions = []
    probas = []
    model.eval()
    if torch.cuda.is_available():
        model = model.cuda()
    with torch.no_grad():
        for images, labels in tqdm(loader):
            if torch.cuda.is_available():
                images = images.cuda()
            batch_probas = model.predict_proba(images)
            batch_preds = torch.max(batch_probas, 1)[1]
            predictions.append(batch_preds)
            probas.append(batch_probas)

    predictions = torch.hstack(predictions).flatten().tolist()
    probas = torch.vstack(probas).tolist()

    return predictions, probas


def lr_find(trainer, model, train_data_loader, val_data_loader=None, plot=False):
    val_dataloaders = [val_data_loader] if val_data_loader else None

    lr_finder = trainer.tuner.lr_find(model,
                                      train_dataloader=train_data_loader,
                                      val_dataloaders=val_dataloaders)
    if plot:
        plt.figure()
        plt.title('LR finder results')
        lr_finder.plot(suggest=True)
        plt.show()

    newlr = lr_finder.suggestion()
    logging.info('LR finder suggestion: %f', newlr)

    return newlr


def train_classifier(model, train_loader, hparams, only_train_layers=None, log_training=True, logger_name='classifier'):
    logger = TensorBoardLogger("lightning_logs", name=logger_name) if log_training else None
    lr_monitor = LearningRateMonitor(logging_interval='step')
    trainer = Trainer.from_argparse_args(
        hparams,
        reload_dataloaders_every_epoch=True,
        terminate_on_nan=True,
        precision=hparams.precision,
        amp_level=hparams.amp_level,
        callbacks=[lr_monitor],
        log_every_n_steps=hparams.log_every_n_steps,
        flush_logs_every_n_steps=hparams.flush_logs_every_n_steps,
        logger=logger,
    )

    # Model
    new_model = LeafDoctorModel(hparams, only_train_layers=only_train_layers)
    new_model.load_state_dict(model.state_dict())
    model = new_model

    # Training
    trainer.fit(model, train_loader)
    logging.info('Training finished')
    return model


def train_byol(model, loader, byol_parameters, log_training=True, logger_name='byol'):
    only_train_layers = [
        lambda trunk: trunk.blocks[-1],
        lambda trunk: trunk.conv_head,
        lambda trunk: trunk.bn2,
        lambda trunk: trunk.global_pool,
        lambda trunk: trunk.act2,
        lambda trunk: trunk.classifier,
    ]
    new_model = LeafDoctorModel(only_train_layers=only_train_layers)
    new_model.load_state_dict(model.state_dict())
    model = new_model

    hparams = Namespace(**byol_parameters)

    logger = TensorBoardLogger("lightning_logs", name=logger_name) if log_training else None
    byol = BYOL(model.trunk, hparams=hparams)
    early_stopping = EarlyStopping('train_loss',
                                   mode='min',
                                   patience=hparams.early_stop_patience,
                                   verbose=True)
    lr_monitor = LearningRateMonitor(logging_interval='step')
    trainer = Trainer.from_argparse_args(
        hparams,
        reload_dataloaders_every_epoch=True,
        terminate_on_nan=True,
        callbacks=[early_stopping, lr_monitor],
        precision=hparams.precision,
        amp_level=hparams.amp_level,
        log_every_n_steps=hparams.log_every_n_steps,
        flush_logs_every_n_steps=hparams.flush_logs_every_n_steps,
        logger=logger,
    )

    if hparams.auto_lr_find:
        new_lr = lr_find(trainer, byol, loader)
        hparams.lr = new_lr
        byol.hparams.lr = new_lr

    trainer.fit(byol, loader, loader)

    pretrained_model = LeafDoctorModel(None)
    pretrained_model.trunk.load_state_dict(byol.encoder.model.state_dict())
    return pretrained_model


In [None]:
# file bitempered_loss.py

# https://github.com/fhopfmueller/bi-tempered-loss-pytorch/blob/master/bi_tempered_loss_pytorch.py

import torch


def log_t(u, t):
    """Compute log_t for `u'."""
    if t == 1.0:
        return u.log()
    else:
        return (u.pow(1.0 - t) - 1.0) / (1.0 - t)


def exp_t(u, t):
    """Compute exp_t for `u'."""
    if t == 1:
        return u.exp()
    else:
        return (1.0 + (1.0 - t) * u).relu().pow(1.0 / (1.0 - t))


def compute_normalization_fixed_point(activations, t, num_iters):
    """Returns the normalization value for each example (t > 1.0).

    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      t: Temperature 2 (> 1.0 for tail heaviness).
      num_iters: Number of iterations to run the method.
    Return: A tensor of same shape as activation with the last dimension being 1.
    """
    mu, _ = torch.max(activations, -1, keepdim=True)
    normalized_activations_step_0 = activations - mu

    normalized_activations = normalized_activations_step_0

    for _ in range(num_iters):
        logt_partition = torch.sum(
            exp_t(normalized_activations, t), -1, keepdim=True)
        normalized_activations = normalized_activations_step_0 * \
                                 logt_partition.pow(1.0 - t)

    logt_partition = torch.sum(
        exp_t(normalized_activations, t), -1, keepdim=True)
    normalization_constants = - log_t(1.0 / logt_partition, t) + mu

    return normalization_constants


def compute_normalization_binary_search(activations, t, num_iters):
    """Returns the normalization value for each example (t < 1.0).

    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      t: Temperature 2 (< 1.0 for finite support).
      num_iters: Number of iterations to run the method.
    Return: A tensor of same rank as activation with the last dimension being 1.
    """

    mu, _ = torch.max(activations, -1, keepdim=True)
    normalized_activations = activations - mu

    effective_dim = \
        torch.sum(
            (normalized_activations > -1.0 / (1.0 - t)).to(torch.int32),
            dim=-1, keepdim=True).to(activations.dtype)

    shape_partition = activations.shape[:-1] + (1,)
    lower = torch.zeros(shape_partition, dtype=activations.dtype, device=activations.device)
    upper = -log_t(1.0 / effective_dim, t) * torch.ones_like(lower)

    for _ in range(num_iters):
        logt_partition = (upper + lower) / 2.0
        sum_probs = torch.sum(
            exp_t(normalized_activations - logt_partition, t),
            dim=-1, keepdim=True)
        update = (sum_probs < 1.0).to(activations.dtype)
        lower = torch.reshape(
            lower * update + (1.0 - update) * logt_partition,
            shape_partition)
        upper = torch.reshape(
            upper * (1.0 - update) + update * logt_partition,
            shape_partition)

    logt_partition = (upper + lower) / 2.0
    return logt_partition + mu


class ComputeNormalization(torch.autograd.Function):
    """
    Class implementing custom backward pass for compute_normalization. See compute_normalization.
    """

    @staticmethod
    def forward(ctx, activations, t, num_iters):
        if t < 1.0:
            normalization_constants = compute_normalization_binary_search(activations, t, num_iters)
        else:
            normalization_constants = compute_normalization_fixed_point(activations, t, num_iters)

        ctx.save_for_backward(activations, normalization_constants)
        ctx.t = t
        return normalization_constants

    @staticmethod
    def backward(ctx, grad_output):
        activations, normalization_constants = ctx.saved_tensors
        t = ctx.t
        normalized_activations = activations - normalization_constants
        probabilities = exp_t(normalized_activations, t)
        escorts = probabilities.pow(t)
        escorts = escorts / escorts.sum(dim=-1, keepdim=True)
        grad_input = escorts * grad_output

        return grad_input, None, None


def compute_normalization(activations, t, num_iters=5):
    """Returns the normalization value for each example.
    Backward pass is implemented.
    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      t: Temperature 2 (> 1.0 for tail heaviness, < 1.0 for finite support).
      num_iters: Number of iterations to run the method.
    Return: A tensor of same rank as activation with the last dimension being 1.
    """
    return ComputeNormalization.apply(activations, t, num_iters)


def tempered_sigmoid(activations, t, num_iters=5):
    """Tempered sigmoid function.

    Args:
      activations: Activations for the positive class for binary classification.
      t: Temperature tensor > 0.0.
      num_iters: Number of iterations to run the method.

    Returns:
      A probabilities tensor.
    """
    internal_activations = torch.stack([activations,
                                        torch.zeros_like(activations)],
                                       dim=-1)
    internal_probabilities = tempered_softmax(internal_activations, t, num_iters)
    return internal_probabilities[..., 0]


def tempered_softmax(activations, t, num_iters=5):
    """Tempered softmax function.
    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      t: Temperature > 1.0.
      num_iters: Number of iterations to run the method.
    Returns:
      A probabilities tensor.
    """
    if t == 1.0:
        return activations.softmax(dim=-1)

    normalization_constants = compute_normalization(activations, t, num_iters)
    return exp_t(activations - normalization_constants, t)


def bi_tempered_binary_logistic_loss(activations,
                                     labels,
                                     t1,
                                     t2,
                                     label_smoothing=0.0,
                                     num_iters=5,
                                     reduction='mean'):
    """Bi-Tempered binary logistic loss.

    Args:
      activations: A tensor containing activations for class 1.
      labels: A tensor with shape as activations, containing probabilities for class 1
      t1: Temperature 1 (< 1.0 for boundedness).
      t2: Temperature 2 (> 1.0 for tail heaviness, < 1.0 for finite support).
      label_smoothing: Label smoothing
      num_iters: Number of iterations to run the method.

    Returns:
      A loss tensor.
    """
    internal_activations = torch.stack([activations,
                                        torch.zeros_like(activations)],
                                       dim=-1)
    internal_labels = torch.stack([labels.to(activations.dtype),
                                   1.0 - labels.to(activations.dtype)],
                                  dim=-1)
    return bi_tempered_logistic_loss(internal_activations,
                                     internal_labels,
                                     t1,
                                     t2,
                                     label_smoothing=label_smoothing,
                                     num_iters=num_iters,
                                     reduction=reduction)


def bi_tempered_logistic_loss(activations,
                              labels,
                              t1,
                              t2,
                              label_smoothing=0.0,
                              num_iters=5,
                              reduction='mean'):
    """Bi-Tempered Logistic Loss.
    Args:
      activations: A multi-dimensional tensor with last dimension `num_classes`.
      labels: A tensor with shape and dtype as activations (onehot),
        or a long tensor of one dimension less than activations (pytorch standard)
      t1: Temperature 1 (< 1.0 for boundedness).
      t2: Temperature 2 (> 1.0 for tail heaviness, < 1.0 for finite support).
      label_smoothing: Label smoothing parameter between [0, 1). Default 0.0.
      num_iters: Number of iterations to run the method. Default 5.
      reduction: ``'none'`` | ``'mean'`` | ``'sum'``. Default ``'mean'``.
        ``'none'``: No reduction is applied, return shape is shape of
        activations without the last dimension.
        ``'mean'``: Loss is averaged over minibatch. Return shape (1,)
        ``'sum'``: Loss is summed over minibatch. Return shape (1,)
    Returns:
      A loss tensor.
    """

    if len(labels.shape) < len(activations.shape):  # not one-hot
        labels_onehot = torch.zeros_like(activations)
        labels_onehot.scatter_(1, labels[..., None], 1)
    else:
        labels_onehot = labels

    if label_smoothing > 0:
        num_classes = labels_onehot.shape[-1]
        labels_onehot = (1 - label_smoothing * num_classes / (num_classes - 1)) \
                        * labels_onehot + \
                        label_smoothing / (num_classes - 1)

    probabilities = tempered_softmax(activations, t2, num_iters)

    loss_values = labels_onehot * log_t(labels_onehot + 1e-10, t1) \
                  - labels_onehot * log_t(probabilities, t1) \
                  - labels_onehot.pow(2.0 - t1) / (2.0 - t1) \
                  + probabilities.pow(2.0 - t1) / (2.0 - t1)
    loss_values = loss_values.sum(dim=-1)  # sum over classes

    if reduction == 'none':
        return loss_values
    if reduction == 'sum':
        return loss_values.sum()
    if reduction == 'mean':
        return loss_values.mean()


In [None]:
#Pipeline prepare

def obtain_image_hashes(train_images_torch_2020, train_images_torch_2019, test_images_torch_2019, extra_images_torch_2019):
    # Adapted from https://www.kaggle.com/zzy990106/duplicate-images-in-two-competitions
    datasets = {
        'train_2020': train_images_torch_2020,
        'train_2019': train_images_torch_2019,
        'test_2019': test_images_torch_2019,
        'extra_2019': extra_images_torch_2019,
    }

    image_ids = []
    hashes = []

    logging.info('Obtaining hashes')

    for dname, ds in tqdm(datasets.items()):
        loader = DataLoader(ds, num_workers=6, batch_size=None, pin_memory=True)
        for ix, (image, label) in tqdm(enumerate(loader), total=len(loader), desc=dname):
            if dname in ['test_2019', 'extra_2019']:
                label = None

            if label is not None:
                label = int(label)
            img_id = (dname, ix, label)
            pil_img = Image.fromarray(np.array(image))
            hash = get_img_hash(pil_img)
            image_ids.append(img_id)
            hashes.append(hash)

    image_ids_df = pd.DataFrame(image_ids, columns=['ds', 'ix', 'label'])
    hashes_df = pd.DataFrame(np.array(hashes).astype(int))

    return image_ids_df, hashes_df


def find_duplicates(image_ids, image_hashes):
    # Adapted from https://www.kaggle.com/zzy990106/duplicate-images-in-two-competitions
    image_ids = image_ids.values
    hashes = image_hashes.values

    hashes_all = np.array(hashes)
    hashes_all = torch.Tensor(hashes_all.astype(int))

    logging.info('Computing similarities and finding duplicates')
    sim_threshold = int(0.9 * hashes_all.shape[1])
    duplicates = []
    for i in tqdm(range(hashes_all.shape[0])):
        sim = ((hashes_all[i] == hashes_all).sum(dim=1).numpy() > sim_threshold).astype(int)
        dupes = np.nonzero(sim)[0]
        if len(dupes) > 1:
            for dup in dupes:
                if dup != i:
                    duplicates.append(tuple(sorted([i, dup])))

    duplicates = list(set(duplicates))

    out_rows = []
    for duplicate_pair in duplicates:
        image_id1 = image_ids[duplicate_pair[0]]
        image_id2 = image_ids[duplicate_pair[1]]
        out_rows.append(
            # ds1 | id1 | label1 | ds2 | id2 | label2
            (*image_id1, *image_id2)
        )

    out_rows = pd.DataFrame(list(set(out_rows)), columns=['ds1', 'id1', 'label1', 'ds2', 'id2', 'label2'])
    return out_rows


def prepare_dataset(train_images_torch_2020, train_images_torch_2019, test_images_torch_2019, extra_images_torch_2019, duplicates):
    blacklist = dict(duplicates[['ds2', 'id2']].groupby('ds2').agg({'id2': list})['id2'])

    train_images_torch_2020.transform = data_preapre_transform
    train_images_torch_2019.transform = data_preapre_transform
    test_images_torch_2019.transform = data_preapre_transform
    extra_images_torch_2019.transform = data_preapre_transform

    prepare_transforms = get_prepare_transforms(512, 512)
    train_dataset_2020 = DatasetFromSubset(
        Subset(train_images_torch_2020, indices=[i for i in range(len(train_images_torch_2020)) if i not in blacklist['train_2020']]),
        transform=prepare_transforms)

    train_dataset_2019 = DatasetFromSubset(
        Subset(train_images_torch_2019,
               indices=[i for i in range(len(train_images_torch_2019)) if i not in blacklist['train_2019']]),
        transform=prepare_transforms)

    test_dataset_2019 = DatasetFromSubset(
        Subset(test_images_torch_2019,
               indices=[i for i in range(len(test_images_torch_2019)) if i not in blacklist['test_2019']]),
        transform=prepare_transforms, target_transform=lambda y: -1)

    extra_images_torch_2019 = DatasetFromSubset(
        Subset(extra_images_torch_2019,
               indices=[i for i in range(len(extra_images_torch_2019)) if i not in blacklist['extra_2019']]),
        transform=prepare_transforms, target_transform=lambda y: -1)

    train_dataset = ConcatDataset([train_dataset_2020, train_dataset_2019])
    train_sources = ['train_2020']*len(train_dataset_2020) + ['train_2019']*len(train_dataset_2019)

    unlabelled_dataset = ConcatDataset([test_dataset_2019, extra_images_torch_2019])
    unlabelled_sources = ['test_2019'] * len(test_dataset_2019) + ['extra_2019'] * len(extra_images_torch_2019)

    train_path = 'data/03_primary/train'
    train_csv_path = 'data/03_primary/train.csv'
    unlabelled_path = 'data/03_primary/unlabelled'
    unlabelled_csv_path = 'data/03_primary/unlabelled.csv'

    if any([os.path.exists(train_path),
            os.path.exists(unlabelled_path)]):
        raise Exception('Dataset folders already exist, delete manually to overwrite.')

    os.makedirs(train_path, exist_ok=True)
    os.makedirs(unlabelled_path, exist_ok=True)

    def make_image_folder(dataset, sources, path, csv_path):
        loader = DataLoader(dataset, batch_size=None, num_workers=6, collate_fn=lambda x: x)
        rows = []
        for ix, (image, label) in enumerate(tqdm(loader)):
            image_id = f'{ix}.jpg'
            source = sources[ix]
            img_path = os.path.join(path, image_id)
            imsave(img_path, image)
            rows.append((image_id, label, source))

        df = pd.DataFrame(rows, columns=['image_id', 'label', 'source'])
        df.to_csv(csv_path, index=False)
        return df

    train_df = make_image_folder(train_dataset, train_sources, train_path, train_csv_path)
    unlabelled_df = make_image_folder(unlabelled_dataset, unlabelled_sources, unlabelled_path, unlabelled_csv_path)
    return CassavaDataset(train_path, train_df.image_id, train_df.label), CassavaDataset(unlabelled_path, unlabelled_df.image_id, unlabelled_df.label)


In [None]:
#Pipeline pretrain

def pretrain_model(train, unlabelled, parameters):
    byol_transforms = get_byol_transforms(parameters['byol']['width'], parameters['byol']['height'])
    train_dataset = DatasetFromSubset(
        Subset(train, indices=list(range(len(train)))),
        transform=byol_transforms)
    unlabelled_dataset = DatasetFromSubset(
        Subset(unlabelled, indices=list(range(len(unlabelled)))),
        transform=byol_transforms)
    dataset = ConcatDataset([train_dataset, unlabelled_dataset])
    loader = DataLoader(dataset,
                        batch_size=parameters['byol']['batch_size'],
                        num_workers=parameters['data_loader_workers'],
                        shuffle=True,
                        pin_memory=True)

    byol_params = parameters['byol']
    model = LeafDoctorModel()
    pretrained_model = train_byol(model, loader,
                                  byol_parameters=byol_params,
                                  log_training=parameters['log_training'],
                                  logger_name='byol_train')
    return pretrained_model


In [None]:
#Pipeline train

def train_model(pretrained_model, train, parameters):
    train_transform = get_train_transforms(parameters['classifier']['train_width'], parameters['classifier']['train_height'])

    train_dataset = DatasetFromSubset(Subset(train, indices=list(range(len(train)))),
                                      transform=train_transform)

    train_loader = DataLoader(train_dataset,
                                batch_size=parameters['classifier']['batch_size'],
                                num_workers=parameters['data_loader_workers'],
                                shuffle=True,
                              pin_memory=True)

    hparams = Namespace(**parameters['classifier'])

    # Train
    logging.info('Training model')
    model = train_classifier(pretrained_model, train_loader, hparams=hparams)
    return model


In [None]:
#Pipeline predict

def predict_submission(cv_results, train, test_images_torch_2020, sample_submission, parameters):
    logging.debug('Predicting on test with model')

    fold_model_names = [cv_results[fold]['model_path'] for fold in cv_results if fold != 'summary']

    all_probas = []
    for model_path in fold_model_names:
        model = LeafDoctorModel(hparams = Namespace(**parameters['classifier']))
        model.load_state_dict(torch.load(model_path))
        finetune_cv_model(model, train, test_images_torch_2020, parameters)

        predictions, probas = predict(model,
                                  dataset=test_images_torch_2020,
                                  indices=list(range(len(test_images_torch_2020))),
                                  batch_size=parameters['eval']['batch_size'],
                                  num_workers=parameters['data_loader_workers'],
                                  transform=get_test_transforms(parameters['classifier']['test_width'], parameters['classifier']['test_height']))

        all_probas.append(probas)

    aggregated_probas = np.mean(all_probas, axis=0).reshape(-1, 5)
    pred_labels = np.argmax(aggregated_probas, 1)
    sample_submission.label = pred_labels
    return sample_submission


In [None]:
#Pipeline cv

def obtain_cv_splits(train, parameters):
    labels = train.labels
    sources = train.sources
    indices_2020 = np.argwhere(sources == 'train_2020').flatten()
    indices_2019 = np.argwhere(sources == 'train_2019').flatten()

    cv = StratifiedKFold(n_splits=parameters['cv_splits'], random_state=parameters['seed'])

    splits = []
    # Preserve same class distribution in both train and test
    # Only put 2020 data in test
    splits_2019 = list(cv.split(indices_2019, labels[indices_2020][:len(indices_2019)]))
    splits_2020 = list(cv.split(indices_2020, labels[indices_2020]))
    for (train_2019_idx, test_2019_idx), (train_2020_idx, test_2020_idx) in zip(splits_2019, splits_2020):
        train_idx = np.concatenate([indices_2019[train_2019_idx], indices_2020[train_2020_idx]])
        test_idx = indices_2020[test_2020_idx]
        splits.append((train_idx, test_idx))
    return splits


def cross_validation(train, unlabelled, cv_splits, parameters):
    cv_results = {
        'summary': {},
    }
    score_values = {
        'test': defaultdict(list),
        'val': defaultdict(list),
    }

    if os.path.exists(parameters['cv_models_dir']) and len(os.listdir(parameters['cv_models_dir'])) > 0:
        raise Exception('CV models path already exists, please delete it explicitly to overwrite')
    else:
        os.makedirs(parameters['cv_models_dir'], exist_ok=True)

    for fold_num, (train_idx, test_idx) in enumerate(cv_splits):
        logging.info('Fitting CV fold %d', fold_num)
        model_path = os.path.join(parameters['cv_models_dir'], f'model_fold_{fold_num}.pt')
        fold_parameters = copy(parameters)

        fold_train_dataset = DatasetFromSubset(Subset(train, indices=train_idx))
        fold_test_dataset = DatasetFromSubset(Subset(train, indices=test_idx))

        # Split
        logging.info('Pretraining on train+unlabelled')
        pretrained_model = pretrain_model(fold_train_dataset, unlabelled, fold_parameters)

        logging.info('Training on train')
        model = train_model(pretrained_model, fold_train_dataset, fold_parameters)

        logging.info('Finetuning with BYOL')
        model = finetune_byol_test(model, fold_train_dataset, fold_test_dataset, fold_parameters)

        logging.info('Finetuning for test resolution')
        model = finetune_classifier_resolution(model, fold_train_dataset, fold_parameters)

        logging.info('Done training CV fold')
        torch.save(model.state_dict(), model_path)

        # Score on test
        test_scores, test_predictions = score_model(model, fold_test_dataset, list(range(len(fold_test_dataset))), fold_parameters)

        cv_results[f'fold_{fold_num}'] = {
            'model_path': model_path,
            'test_indices': test_idx,
            'test_scores': test_scores,
            'test_predictions': test_predictions,
        }

        for score in test_scores:
            score_values['test'][score].append(test_scores[score])

    for score_set in score_values:
        for score_name, scores in score_values[score_set].items():
            cv_results['summary'][f'{score_set}_{score_name}_mean'] = np.mean(scores)
            cv_results['summary'][f'{score_set}_{score_name}_std'] = np.std(scores)

    logging.info('Cross-validation results %s', cv_results['summary'])
    return cv_results


In [None]:
#Pipeline finetune

def finetune_byol_test(pretrained_model, train, test_images_torch_2020, parameters):
    byol_transforms = get_byol_transforms(parameters['byol']['width'], parameters['byol']['height'])

    train_2020_indices = np.argwhere(train.sources == 'train_2020').flatten()
    train_2020 = DatasetFromSubset(Subset(train, indices=train_2020_indices))
    dataset = torch.utils.data.ConcatDataset([train_2020, test_images_torch_2020])
    dataset = DatasetFromSubset(
        torch.utils.data.Subset(dataset, indices=list(range(len(dataset)))),
        transform = byol_transforms)
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=parameters['byol']['batch_size'],
                                         num_workers=parameters['data_loader_workers'],
                                         shuffle=True,
                                         pin_memory=True)

    byol_params = dict(parameters['byol'])
    byol_test_overrides = dict(parameters['byol']['on_test'])
    byol_params.update(byol_test_overrides)

    finetuned_model = train_byol(pretrained_model, loader,
                                  byol_parameters=byol_params,
                                  log_training=parameters['log_training'],
                                  logger_name='byol_test')
    return finetuned_model


def finetune_classifier_resolution(model, train, parameters):
    logging.info('Finetuning model for test image size')

    train_2020_indices = np.argwhere(train.sources == 'train_2020').flatten()
    train_2020 = DatasetFromSubset(Subset(train, indices=train_2020_indices))

    train_transform = get_train_transforms(parameters['classifier']['test_width'],
                                           parameters['classifier']['test_height'])
    train_dataset = DatasetFromSubset(Subset(train_2020, indices=list(range(len(train_2020)))),
                                      transform=train_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=parameters['classifier']['batch_size'],
                              num_workers=parameters['data_loader_workers'],
                              shuffle=True,
                              pin_memory=True)

    hparams = dict(parameters['classifier'])
    hparams.update(dict(parameters['classifier']['finetune']))
    hparams = Namespace(**hparams)

    only_train_layers = [
        lambda trunk: trunk.blocks[-1],
        lambda trunk: trunk.conv_head,
        lambda trunk: trunk.bn2,
        lambda trunk: trunk.global_pool,
        lambda trunk: trunk.act2,
        lambda trunk: trunk.classifier,
    ]
    model = train_classifier(model, train_loader,
                             hparams=hparams,
                             only_train_layers=only_train_layers,
                             log_training=parameters['log_training'],
                             logger_name='classifier_finetune')
    return model


# Parameters

In [None]:
parameters = {
    "seed": 42,
    "cv_splits": 4,
    "cv_models_dir": "data/06_models/cv_folds",
    "validation_size": 0.15,
    "data_loader_workers": 6,
    "log_training": 1,
    "classifier": {
        "train_height": 320,
        "train_width": 320,
        "test_height": 400,
        "test_width": 400,
        "gpus": -1,
        "batch_size": 20,
        "accumulate_grad_batches": 1,
        "max_epochs": 20,
        "max_steps": 0,
        "auto_lr_find": 0,
        "lr": 0.001,
        "weight_decay": 0.0001,
        "bitempered_t1": 0.8,
        "bitempered_t2": 1.2,
        "label_smoothing": 0.1,
        "amp_level": "O2",
        "precision": 16,
        "log_every_n_steps": 10,
        "flush_logs_every_n_steps": 100,
        "finetune": {
            "max_epochs": 10,
            "lr": 0.0001
        }
    },
    "byol": {
        "width": 400,
        "height": 400,
        "gpus": -1,
        "batch_size": 12,
        "accumulate_grad_batches": 1,
        "max_epochs": 10,
        "max_steps": 0,
        "auto_lr_find": 0,
        "lr": 0.001,
        "weight_decay": 0.0001,
        "limit_train_batches": 100,
        "limit_val_batches": 1,
        "early_stop_patience": 3,
        "amp_level": "02",
        "precision": 16,
        "log_every_n_steps": 10,
        "flush_logs_every_n_steps": 100,
        "on_test": {
            "lr": 0.0001,
            "auto_lr_find": 0,
            "max_epochs": 10,
            "early_stop_patience": 1
        }
    },
    "eval": {
        "batch_size": 16
    }
}

In [None]:

DATA_DIR = '/kaggle/input/cassava-leaf-disease-classification'
TRAIN_DATA_DIR = '.'
MODELS_DIR = '/kaggle/input/cassava-models'

sample_submission = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

train_csv = pd.read_csv(f'{TRAIN_DATA_DIR}/train.csv')
train = CassavaDataset(image_ids=train_csv.image_id.values,
                        labels=train_csv.label.values, 
                        sources=train_csv.source.values, 
                        root=f'{TRAIN_DATA_DIR}/train')


test_images_torch_2020 = CassavaDataset(image_ids=sample_submission.image_id.values, labels=sample_submission.label.values, root=f'{DATA_DIR}/test_images')

submission = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

cv_results = {
    'fold_0': {
        'model_path': os.path.join(MODELS_DIR, 'model_fold_0.pt')
    },
    'fold_1': {
        'model_path': os.path.join(MODELS_DIR, 'model_fold_1.pt')
    },
    'fold_2': {
        'model_path': os.path.join(MODELS_DIR, 'model_fold_2.pt')
    },
    'fold_3': {
        'model_path': os.path.join(MODELS_DIR, 'model_fold_3.pt')
    }
}


# Execution

In [None]:
submission = predict_submission(cv_results, train, test_images_torch_2020, sample_submission, parameters)

In [None]:
submission.to_csv('submission.csv', index=False)