In [1]:
PRETRAINED_MODELS_DIR = os.path.join("/content", "pretrained")
EFFICIENTDET_PYTORCH_DIR = os.path.join("/content", "efficientdet-pytorch")
EFFICIENTDET_PYTORCH_GIT_LINK = "https://github.com/rwightman/efficientdet-pytorch.git"

In [2]:
import sys, os, subprocess
if not all(x in sys.path for x in [os.path.abspath(os.pardir), EFFICIENTDET_PYTORCH_DIR]):
    sys.path.insert(0, os.path.abspath(os.pardir))
    sys.path.insert(0, EFFICIENTDET_PYTORCH_DIR)
import CONFIG

%reload_ext autoreload
%autoreload 2

In [3]:
def init_efficientdet():
    if not all([os.path.exists(PRETRAINED_MODELS_DIR), os.path.exists(EFFICIENTDET_PYTORCH_DIR)]):
        os.makedirs(PRETRAINED_MODELS_DIR, exist_ok=True)
        subprocess.call(f"git clone {EFFICIENTDET_PYTORCH_GIT_LINK}", shell=True, cwd=CONFIG.CFG.BASEPATH)

In [4]:
init_efficientdet()

In [5]:
import sys
import torch
import os
from datetime import datetime
import time
import random
import cv2
import pandas as pd
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import StratifiedKFold, KFold
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from glob import glob

SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

In [6]:
DATA_DIR = CONFIG.CFG.DATA.BASE
MODELS_OUT_DIR = CONFIG.CFG.DATA.MODELS_OUT

DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
ON_CPU = DEVICE == torch.device('cpu')
TRAIN_BATCH_SIZE = 4 if ON_CPU else 5
VALID_BATCH_SIZE = 4 if ON_CPU else 6
NUM_EPOCHS = 5 if ON_CPU else 10
LEARNING_RATE = 4e-5
NEW_COLUMNS = ['x', 'y', 'w', 'h']
IMAGE_SIZE = 512

In [7]:
train_df = pd.read_csv(os.path.join(DATA_DIR, "train.csv"))
test_df = pd.read_csv(os.path.join(DATA_DIR, "sample_submission.csv"))

UNIQ_TRAIN_IMAGE_IDS = train_df["image_id"].unique()

train_df.shape, test_df.shape

((147793, 5), (10, 2))

In [8]:
# separate the bboxes into new columns
sep_bboxes = np.stack(train_df['bbox'].apply(lambda x: np.fromstring(x[1:-1], sep=',')))
for i, new_column in enumerate(NEW_COLUMNS):
    train_df[new_column] = sep_bboxes[:, i]
train_df.drop(columns=['bbox'], inplace=True)
train_df.head()

Unnamed: 0,image_id,width,height,source,x,y,w,h
0,b6ab77fd7,1024,1024,usask_1,834.0,222.0,56.0,36.0
1,b6ab77fd7,1024,1024,usask_1,226.0,548.0,130.0,58.0
2,b6ab77fd7,1024,1024,usask_1,377.0,504.0,74.0,160.0
3,b6ab77fd7,1024,1024,usask_1,834.0,95.0,109.0,107.0
4,b6ab77fd7,1024,1024,usask_1,26.0,144.0,124.0,117.0


In [9]:
train_df.loc[:, "fold"] = -1
kf = KFold(shuffle=True, random_state=42)
for fold, (train_ids, valid_ids) in enumerate(kf.split(UNIQ_TRAIN_IMAGE_IDS)):
    ids_valid = [UNIQ_TRAIN_IMAGE_IDS[k] for k in valid_ids]

    indices = train_df[train_df['image_id'].isin(ids_valid)].index
    train_df.loc[indices, "fold"] = fold

In [10]:
train_df.sample(10)

Unnamed: 0,image_id,width,height,source,x,y,w,h,fold
55997,1cf002747,1024,1024,ethz_1,436.0,524.0,115.0,53.0,2
22445,b61e3461d,1024,1024,arvalis_1,475.0,402.0,81.0,97.0,2
51867,e2018867c,1024,1024,inrae_1,422.0,180.0,106.0,163.0,2
93085,302fd5e69,1024,1024,ethz_1,368.0,279.0,62.0,44.0,3
103557,14da8934d,1024,1024,ethz_1,780.0,595.0,54.0,54.0,2
96795,27f0a8188,1024,1024,ethz_1,507.0,909.0,62.0,24.0,4
67884,e891a6cff,1024,1024,ethz_1,0.0,449.0,129.0,80.0,3
10723,b95fd89e3,1024,1024,arvalis_1,572.0,291.0,77.0,78.0,0
35637,e4b256788,1024,1024,arvalis_1,240.0,114.0,96.0,100.0,4
17247,b0a32bf24,1024,1024,arvalis_1,967.0,524.0,56.0,47.0,2


In [11]:
def load_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    assert image is not None, f"IMAGE NOT FOUND AT {image_path}"
    return image

In [12]:
def get_train_transforms():
    return A.Compose(
        [
            A.RandomSizedCrop(min_max_height=(800, 800), height=1024, width=1024, p=0.5),
            A.OneOf([
                A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, 
                                     val_shift_limit=0.2, p=0.9),
                A.RandomBrightnessContrast(brightness_limit=0.2, 
                                           contrast_limit=0.2, p=0.9),
            ],p=0.9),
            A.ToGray(p=0.01),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE, p=1),
            A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0, 
            min_visibility=0,
            label_fields=['labels']
        )
    )

def get_valid_transforms():
    return A.Compose(
        [
            A.Resize(height=IMAGE_SIZE, width=IMAGE_SIZE, p=1.0),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0, 
            min_visibility=0,
            label_fields=['labels']
        )
    )

In [13]:
class AverageMeter:
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [14]:
class WheatDataset(Dataset):
    def __init__(self, df, image_dir, transforms=None):
        super().__init__()

        self.df = df
        self.image_dir = image_dir
        self.transforms = transforms
        self.image_ids = df['image_id'].unique()

    def __len__(self) -> int:
        return len(self.image_ids)

    def __getitem__(self, idx: int):
        image_id = self.image_ids[idx]
        image = load_image(os.path.join(self.image_dir, f"{image_id}.jpg")).astype(np.float32)
        image /= 255.0

        records = self.df[self.df['image_id'] == image_id]

        boxes = records[NEW_COLUMNS].values
        area = boxes[:, 2] * boxes[:, 3]
        area = torch.as_tensor(area, dtype=torch.float32)

        # change the co-ordinates into expected [x, y, x+w, y+h] format
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        # since all the boxes are wheat, it's all 1s
        labels = torch.ones((boxes.shape[0],), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = torch.tensor([idx])
        target["area"] = area

        if self.transforms:
            aug = self.transforms(image=image, bboxes=boxes, labels=labels)
            while len(aug['bboxes']) == 0:
                aug = self.transforms(image=image, bboxes=boxes, labels=labels)
            image = aug['image']
            target['boxes'] = torch.as_tensor(aug['bboxes'], dtype=torch.float32)

            # format yxyx
            target['boxes'][:, [0,1,2,3]] = target['boxes'][:, [1,0,3,2]]
            target['labels'] = torch.stack(aug['labels'])

        return image, target

In [15]:
fold = 0

train_dataset = WheatDataset(
    train_df[train_df['fold'] != fold],
    os.path.join(DATA_DIR, "train"),
    transforms=get_train_transforms(),
)

validation_dataset = WheatDataset(
    train_df[train_df['fold'] == fold],
    os.path.join(DATA_DIR, "train"),
    transforms=get_valid_transforms(),
)

In [16]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_data_loader = DataLoader(
        train_dataset,
        batch_size=TRAIN_BATCH_SIZE,
        sampler=RandomSampler(train_dataset),
        num_workers=1,
        collate_fn=collate_fn
    )

valid_data_loader = DataLoader(
    validation_dataset,
    batch_size=VALID_BATCH_SIZE,
    sampler=SequentialSampler(validation_dataset),
    shuffle=False,
    num_workers=1,
    collate_fn=collate_fn
)

In [17]:
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain
from effdet.efficientdet import HeadNet

def get_model():
    model_config = get_efficientdet_config('tf_efficientdet_d5')
    model = EfficientDet(model_config, pretrained_backbone=False)
    checkpoint = torch.load(os.path.join(PRETRAINED_MODELS_DIR, 'tf_efficientdet_d5-ef44aea8.pth'))
    model.load_state_dict(checkpoint)
    model_config.num_classes = 1
    model_config.image_size = IMAGE_SIZE
    model.class_net = HeadNet(model_config, num_outputs=model_config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01))
    return DetBenchTrain(model, model_config)

In [18]:
%%capture
model = get_model()
model.to(DEVICE)

In [19]:
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
lr_scheduler = None

In [20]:
for epoch in range(NUM_EPOCHS):
    for b_idx, (images, targets) in enumerate(train_data_loader):
        images = torch.stack(images).to(DEVICE).float()

        target = {
            'bbox': [],
            'cls': []
        }

        for d in targets:
            target['bbox'].append(d['boxes'])
            target['cls'].append(d['labels'])

        output = model(images, target)
        loss = output['loss']

        print(loss.item())

        loss.backward()
        optimizer.step()
        if lr_scheduler is not None:
            lr_scheduler.step()


RuntimeError: CUDA out of memory. Tried to allocate 76.00 MiB (GPU 0; 15.90 GiB total capacity; 3.94 GiB already allocated; 54.88 MiB free; 3.97 GiB reserved in total by PyTorch)