In [1]:
!pip install https://github.com/ufoym/imbalanced-dataset-sampler/archive/master.zip

Collecting https://github.com/ufoym/imbalanced-dataset-sampler/archive/master.zip
  Downloading https://github.com/ufoym/imbalanced-dataset-sampler/archive/master.zip
[K     \ 297 kB 819 kB/ss
Building wheels for collected packages: torchsampler
  Building wheel for torchsampler (setup.py) ... [?25ldone
[?25h  Created wheel for torchsampler: filename=torchsampler-0.1.1-py3-none-any.whl size=3799 sha256=ec87a3c0e38daa136b614530cca604ddd440e553248820a393b888291ffe654b
  Stored in directory: /tmp/pip-ephem-wheel-cache-3ml6docl/wheels/52/7b/7d/ce0e0ddbb7864877a0e31a96f883a928791ebfa6eaf7b52f87
Successfully built torchsampler
Installing collected packages: torchsampler
Successfully installed torchsampler-0.1.1


In [2]:
!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25ldone
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16446 sha256=ab996967eaed022d3bd6c54848223339ecc018b3299ecf4a07392aad348592a4
  Stored in directory: /root/.cache/pip/wheels/0e/cc/b2/49e74588263573ff778da58cc99b9c6349b496636a7e165be6
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.1


In [3]:
import os
import random

import albumentations as A
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr
from efficientnet_pytorch import EfficientNet
from torch.nn.utils import clip_grad_norm_
from torch.utils.data import DataLoader, Dataset
from torchsampler import ImbalancedDatasetSampler
from tqdm.notebook import tqdm

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [5]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TRAIN_TRANSFORM = A.Compose([
    A.Flip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.3, hue=0, p=0.5),
    A.Blur(blur_limit=4, p=0.5),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

TEST_TRANSFORM = A.Compose([
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

In [6]:
class MyDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        filepath = "../input/bfg-dataset/preprocessed_data/" + self.df.loc[idx, "img_id"].replace("\\", "/")

        img = cv2.imread(filepath)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        if self.transform is not None:
            img = self.transform(image=img)["image"]

        return torch.tensor(img.transpose((2, 0, 1))), torch.tensor([self.df.loc[idx, "healthy"]])

    def get_label(self, idx):
        return self.df.loc[idx, "plant_condition"]

In [7]:
def get_lr(optimizer):
    for p in optimizer.param_groups:
        return p["lr"]

In [8]:
# Source: https://github.com/clovaai/CutMix-PyTorch
def rand_bbox(size, lambd):
    W = size[2]
    H = size[3]

    cut_rat = np.sqrt(1 - lambd)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2


def cutmix(x, y, model, criterion):
    lambd = np.random.beta(1.0, 1.0)

    rand_index = torch.randperm(x.size()[0]).to(DEVICE)

    y_a = y
    y_b = y[rand_index]

    bbx1, bby1, bbx2, bby2 = rand_bbox(x.size(), lambd)
    x[:, :, bbx1:bbx2, bby1:bby2] = x[rand_index, :, bbx1:bbx2, bby1:bby2]
    lambd = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (x.size()[-1] * x.size()[-2]))

    y_pred = model(x)

    return criterion(y_pred, y_a) * lambd + criterion(y_pred, y_b) * (1 - lambd)

In [9]:
def main(args):
    seed_everything(args["seed"])

    df = pd.read_csv("../input/bfg-dataset/data_additional2.csv")

    dataset = {
        "train": MyDataset(df[df["split"] == "train"], transform=TRAIN_TRANSFORM),
        "dev": MyDataset(df[df["split"] == "dev"], transform=TEST_TRANSFORM)
    }

    # Source: https://github.com/ufoym/imbalanced-dataset-sampler
    dataloader = {
        "train": DataLoader(dataset["train"], sampler=ImbalancedDatasetSampler(dataset["train"]), batch_size=args["batch_size"]),
        "dev": DataLoader(dataset["dev"], batch_size=args["batch_size"])
    }
    
    model = EfficientNet.from_pretrained("efficientnet-b4", num_classes=1).to(DEVICE)
    model.load_state_dict(torch.load("../input/bayer-2021-b4-pretrained-v2/best.pth"))
    optimizer = optim.AdamW(model.parameters(), lr=args["lr"], weight_decay=args["weight_decay"])
    scheduler = lr.CosineAnnealingLR(optimizer, T_max=args["epochs"] + 1, eta_min=args["min_lr"])
    neg = 62632
    pos = 19011
    criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([neg/pos]).to(DEVICE))
    
    log = pd.DataFrame(columns=["epoch", "train loss", "dev loss", "learning rate"])

    pbar_epoch = tqdm(range(args["epochs"]))

    best = {
        "dev": {
            "epoch": 0,
            "loss": np.inf,
        }
    }

    for epoch in pbar_epoch:
        stats = {
            "train": {
                "loss": 0,
            },
            "dev": {
                "loss": 0,
            }
        }

        model.train()

        for x, y in tqdm(dataloader["train"], desc="train", leave=False):
            x = x.to(DEVICE)
            y = y.to(DEVICE, torch.float)

            if np.random.rand() > 0.5:
                loss = cutmix(x, y, model, criterion)
            else:
                loss = criterion(model(x), y)

            stats["train"]["loss"] += loss.item() / len(dataloader["train"])

            loss.backward()
            clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            optimizer.zero_grad()

        model.eval()

        with torch.no_grad():
            for x, y in tqdm(dataloader["dev"], desc="dev", leave=False):
                x = x.to(DEVICE)
                y = y.to(DEVICE, torch.float)

                loss = criterion(model(x), y)
                stats["dev"]["loss"] += loss.item() / len(dataloader["dev"])

        if stats["dev"]["loss"] < best["dev"]["loss"]:
            best["dev"]["epoch"] = epoch
            best["dev"]["loss"] = stats["dev"]["loss"]

            torch.save(model.state_dict(), "best.pth")

        pbar_epoch.set_description("best: {:.4f} curr: {:.4f} lr: {}".format(best["dev"]["loss"], stats["dev"]["loss"], get_lr(optimizer)))

        log = log.append({
            "epoch": epoch,
            "train loss": stats["train"]["loss"],
            "dev loss": stats["dev"]["loss"],
            "learning rate": get_lr(optimizer)
        }, ignore_index=True)

        log.to_csv("log.csv", index=False)
        
        scheduler.step()

        torch.save({
            "epoch": epoch,
            "model": model.state_dict(),
            "optimizer": optimizer.state_dict(),
            "scheduler": scheduler.state_dict(),
        }, "checkpoint.pth")

In [10]:
if __name__ == "__main__":
    main({
        "seed": 42,
        "epochs": 5,
        "batch_size": 32,
        "lr": 1e-4,
        "min_lr": 1e-5,
        "weight_decay": 1e-5,
    })

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b4-6ed6700e.pth


  0%|          | 0.00/74.4M [00:00<?, ?B/s]

Loaded pretrained weights for efficientnet-b4


  0%|          | 0/5 [00:00<?, ?it/s]

train:   0%|          | 0/1531 [00:00<?, ?it/s]

dev:   0%|          | 0/511 [00:00<?, ?it/s]

train:   0%|          | 0/1531 [00:00<?, ?it/s]

dev:   0%|          | 0/511 [00:00<?, ?it/s]

train:   0%|          | 0/1531 [00:00<?, ?it/s]

dev:   0%|          | 0/511 [00:00<?, ?it/s]

train:   0%|          | 0/1531 [00:00<?, ?it/s]

dev:   0%|          | 0/511 [00:00<?, ?it/s]

train:   0%|          | 0/1531 [00:00<?, ?it/s]

dev:   0%|          | 0/511 [00:00<?, ?it/s]