<div style="text-align: center;">
Version Control Log
</div>

|  Version  |  Date  |  Description of change  |
| ---- | ---- |--------|
|  2  |  27/11/2021  | Initial release|
|  3  |  28/11/2021  | Add weights for d0~d7 and table and correct typo|
|  8  |  29/11/2021  | Add log plot|

# Message
This is my first public notebook for me.  
So if you find the notebook is something wrong or strange, please tell me!  
To create the notebook, I spare the much time😅  
I work for it from the evening to the midnight😪 So If you find the notebook is useful please upvote👍  
Then, I will be very HAPPY😆  
Let's fight together for helping our Great Barrier Reef💪  

# Other Resources
I also prepare [the discussion](https://www.kaggle.com/c/tensorflow-great-barrier-reef/discussion/290992) about EfficientDet.  
So please check it out.
Also **inference** notebook is under preparation.

# Install

In [None]:
!pip install --no-deps '../input/timm-package/timm-0.4.12-py3-none-any.whl' > /dev/null
!pip install --no-deps '../input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl' > /dev/null

# Imports

In [None]:
import sys
sys.path.insert(0, "../input/timm-efficientdet-pytorch-fix-v3/timm_efficientdet_pytorch_fix_v3")
sys.path.insert(0, "../input/omegaconf")
sys.path.insert(0, "../input/albumentations-fix-v1/albumentations_fix_v1")

import torch
import os
from datetime import datetime
import time
import random
import cv2
import pandas as pd
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from glob import glob

SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

In [None]:
data_df = pd.read_csv('../input/tensorflow-great-barrier-reef/train.csv')
data_df = data_df[data_df.annotations != '[]'].reset_index()

In [None]:
cols = ['image_id','video_id', 'x', 'y', 'w', 'h']
new_data_df = pd.DataFrame(index=[], columns=cols)

for index, row in data_df.iterrows():
    annotations = eval(row['annotations'])
    for annotation in annotations:
        tmp_row = pd.Series({"image_id":row["image_id"],"video_id":row["video_id"], "x":annotation["x"], "y":annotation["y"],"w":annotation["width"],"h":annotation["height"]})
        new_data_df = new_data_df.append(tmp_row, ignore_index=True)

In [None]:
data_df = new_data_df

In [None]:
skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
df_folds = data_df[['image_id']].copy()
df_folds.loc[:, 'bbox_count'] = 1
df_folds = df_folds.groupby('image_id').count()
df_folds.loc[:, 'video_id'] = data_df[['image_id', 'video_id']].groupby('image_id').min()['video_id']
df_folds.loc[:, 'stratify_group'] = np.char.add(
    df_folds['video_id'].values.astype(str),
    df_folds['bbox_count'].apply(lambda x: f'_{x // 15}').values.astype(str)
)
df_folds.loc[:, 'fold'] = 0
for fold_number, (train_index, val_index) in enumerate(skf.split(X=df_folds.index, y=df_folds['stratify_group'])):
    df_folds.loc[df_folds.iloc[val_index].index, 'fold'] = fold_number

In [None]:
def get_train_transforms():
    return A.Compose(
        [
            A.RandomSizedCrop(min_max_height=(600, 600), height=720, width=1280, p=0.5),
            A.OneOf([
                A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, 
                                     val_shift_limit=0.2, p=0.9),
                A.RandomBrightnessContrast(brightness_limit=0.2, 
                                           contrast_limit=0.2, p=0.9),
            ],p=0.9),
            A.ToGray(p=0.01),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Resize(height=512, width=512, p=1),
            A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0, 
            min_visibility=0,
            label_fields=['labels']
        )
    )

def get_valid_transforms():
    return A.Compose(
        [
            A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0, 
            min_visibility=0,
            label_fields=['labels']
        )
    )

# Datasets

In [None]:
TRAIN_ROOT_PATH = '../input/tensorflow-great-barrier-reef/train_images'

class DatasetRetriever(Dataset):

    def __init__(self, marking, image_ids, transforms=None, test=False):
        super().__init__()

        self.image_ids = image_ids
        self.marking = marking
        self.transforms = transforms
        self.test = test

    def __getitem__(self, index: int):
        video_id_image_id = self.image_ids[index]
        video_id_image_ids = video_id_image_id.split('-')
        video_id = video_id_image_ids[0]
        image_id = video_id_image_ids[1]
        
        image, boxes = self.load_image_and_boxes(index)

        # there is only one class
        labels = torch.ones((boxes.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([index])

        if self.transforms:
            for i in range(10):
                sample = self.transforms(**{
                    'image': image,
                    'bboxes': target['boxes'],
                    'labels': labels
                })
                if len(sample['bboxes']) > 0:
                    image = sample['image']
                    target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
                    target['boxes'][:,[0,1,2,3]] = target['boxes'][:,[1,0,3,2]]  #yxyx: be warning
                    break

        return image, target, image_id, video_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

    def load_image_and_boxes(self, index):
        video_id_image_id = self.image_ids[index]
        video_id_image_ids = video_id_image_id.split('-')
        video_id = "video_" + video_id_image_ids[0]
        image_id = video_id_image_ids[1]
        
        img_path = f'{TRAIN_ROOT_PATH}/{video_id}/{image_id}.jpg'
        
        assert os.path.isfile(img_path) == True
        
        image = cv2.imread(img_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        records = self.marking[self.marking['image_id'] == video_id_image_id]
        boxes = records[['x', 'y', 'w', 'h']].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        return image, boxes

In [None]:
fold_number = 0

train_dataset = DatasetRetriever(
    image_ids=df_folds[df_folds['fold'] != fold_number].index.values,
    marking=data_df,
    transforms=get_train_transforms(),
    test=False,
)

validation_dataset = DatasetRetriever(
    image_ids=df_folds[df_folds['fold'] == fold_number].index.values,
    marking=data_df,
    transforms=get_valid_transforms(),
    test=True,
)

# Example

In [None]:
# If you find the image looks red, please run the cell again several times. 
# It is just caused by switching R and B chanell in visualization (maybe).
# I will solve this bug in the next version.
image, target, image_id, video_id = train_dataset[0]
boxes = target['boxes'].cpu().numpy().astype(np.int32)
numpy_image = image.permute(1,2,0).cpu().numpy()
fig, ax = plt.subplots(1, 1, figsize=(16, 8))

for box in boxes:
    cv2.rectangle(numpy_image, (box[1], box[0]), (box[3],  box[2]), (0, 1, 0), 2)
    
ax.set_axis_off()
ax.imshow(numpy_image);

In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
import warnings

warnings.filterwarnings("ignore")

class Fitter:
    
    def __init__(self, model, device, config):
        self.config = config
        self.epoch = 0

        self.base_dir = f'./{config.folder}'
        if not os.path.exists(self.base_dir):
            os.makedirs(self.base_dir)
        
        self.log_path = f'{self.base_dir}/log.csv'
        self.best_summary_loss = 10**5

        self.model = model
        self.device = device

        param_optimizer = list(self.model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ] 

        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=config.lr)
        self.scheduler = config.SchedulerClass(self.optimizer, **config.scheduler_params)
        print(f'Fitter prepared. Device is {self.device}')
        self.log(f'status,epoch,loss,time')

    def fit(self, train_loader, validation_loader):
        for e in range(self.config.n_epochs):
            if self.config.verbose:
                lr = self.optimizer.param_groups[0]['lr']
                timestamp = datetime.utcnow().isoformat()
                print(f'\n{timestamp}\nLR: {lr}')

            t = time.time()
            summary_loss = self.train_one_epoch(train_loader)

            self.log(f'train,{self.epoch},{summary_loss.avg:.5f},{(time.time() - t):.5f}')
            self.save(f'{self.base_dir}/last-checkpoint.bin')

            t = time.time()
            summary_loss = self.validation(validation_loader)

            self.log(f'val,{self.epoch},{summary_loss.avg:.5f},{(time.time() - t):.5f}')
            if summary_loss.avg < self.best_summary_loss:
                self.best_summary_loss = summary_loss.avg
                self.model.eval()
                self.save(f'{self.base_dir}/best-checkpoint-{str(self.epoch).zfill(3)}epoch.bin')
                for path in sorted(glob(f'{self.base_dir}/best-checkpoint-*epoch.bin'))[:-3]:
                    os.remove(path)

            if self.config.validation_scheduler:
                self.scheduler.step(metrics=summary_loss.avg)

            self.epoch += 1

    def validation(self, val_loader):
        self.model.eval()
        summary_loss = AverageMeter()
        t = time.time()
        for step, sample in enumerate(val_loader):
            images = sample[0]
            targets = sample[1]
            if self.config.verbose:
                if step % self.config.verbose_step == 0:
                    print(
                        f'Val Step {step}/{len(val_loader)}, ' + \
                        f'summary_loss: {summary_loss.avg:.5f}, ' + \
                        f'time: {(time.time() - t):.5f}', end='\r'
                    )
            with torch.no_grad():
                images = torch.stack(images)
                batch_size = images.shape[0]
                images = images.to(self.device).float()
                boxes = [target['boxes'].to(self.device).float() for target in targets]
                labels = [target['labels'].to(self.device).float() for target in targets]

                loss, _, _ = self.model(images, boxes, labels)
                summary_loss.update(loss.detach().item(), batch_size)

        return summary_loss

    def train_one_epoch(self, train_loader):
        self.model.train()
        summary_loss = AverageMeter()
        t = time.time()
            
        # for step, (images, targets, image_ids) in enumerate(train_loader):
        for step, sample in enumerate(train_loader):
            images = sample[0]
            targets = sample[1]
            
            if self.config.verbose:
                if step % self.config.verbose_step == 0:
                    print(
                        f'Train Step {step}/{len(train_loader)}, ' + \
                        f'summary_loss: {summary_loss.avg:.5f}, ' + \
                        f'time: {(time.time() - t):.5f}', end='\r'
                    )
            
            images = torch.stack(images)
            images = images.to(self.device).float()
            batch_size = images.shape[0]
            boxes = [target['boxes'].to(self.device).float() for target in targets]
            labels = [target['labels'].to(self.device).float() for target in targets]

            self.optimizer.zero_grad()
            
            loss, _, _ = self.model(images, boxes, labels)
            
            loss.backward()

            summary_loss.update(loss.detach().item(), batch_size)

            self.optimizer.step()

            if self.config.step_scheduler:
                self.scheduler.step()

        return summary_loss
    
    def save(self, path):
        self.model.eval()
        torch.save({
            'model_state_dict': self.model.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'scheduler_state_dict': self.scheduler.state_dict(),
            'best_summary_loss': self.best_summary_loss,
            'epoch': self.epoch,
        }, path)

    def load(self, path):
        checkpoint = torch.load(path)
        self.model.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        self.best_summary_loss = checkpoint['best_summary_loss']
        self.epoch = checkpoint['epoch'] + 1
        
    def log(self, message):
        if self.config.verbose:
            print(message)
        with open(self.log_path, 'a+') as logger:
            logger.write(f'{message}\n')

In [None]:
class TrainGlobalConfig:
    num_workers = 2
    batch_size = 16
    n_epochs = 40#4
    lr = 0.0002

    folder = 'effdet'

    # -------------------
    verbose = True
    verbose_step = 1
    # -------------------

    # --------------------
    step_scheduler = False  # do scheduler.step after optimizer.step
    validation_scheduler = True  # do scheduler.step after validation stage loss

    
    SchedulerClass = torch.optim.lr_scheduler.ReduceLROnPlateau
    scheduler_params = dict(
        mode='min',
        factor=0.5,
        patience=1,
        verbose=False, 
        threshold=0.0001,
        threshold_mode='abs',
        cooldown=0, 
        min_lr=1e-8,
        eps=1e-08
    )
    # --------------------

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

def run_training():
    device = torch.device('cuda:0')
    net.to(device)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=TrainGlobalConfig.batch_size,
        sampler=RandomSampler(train_dataset),
        pin_memory=False,
        drop_last=True,
        num_workers=TrainGlobalConfig.num_workers,
        collate_fn=collate_fn,
    )
    val_loader = torch.utils.data.DataLoader(
        validation_dataset, 
        batch_size=TrainGlobalConfig.batch_size,
        num_workers=TrainGlobalConfig.num_workers,
        shuffle=False,
        sampler=SequentialSampler(validation_dataset),
        pin_memory=False,
        collate_fn=collate_fn,
    )

    fitter = Fitter(model=net, device=device, config=TrainGlobalConfig)
    fitter.fit(train_loader, val_loader)

In [None]:
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain
from effdet.efficientdet import HeadNet

def get_net():
    config = get_efficientdet_config('tf_efficientdet_d0')
    config.image_size = 512
    config.norm_kwargs=dict(eps=.001, momentum=.01)

    net = EfficientDet(config, pretrained_backbone=False)
    checkpoint = torch.load('../input/efficientdet-init-weights/efficientdet_d0-d92fd44f.pth')

    net.load_state_dict(checkpoint)
    net.class_net = HeadNet(config, num_outputs=config.num_classes)

    return DetBenchTrain(net, config)

net = get_net()

In [None]:
run_training()

# Training time,loss summary
I tried one epoch training for d0 through d7.  
Here is the summary of training time and loss.  
It depends on the situation. So please use it for rough indication.  
I don't know comparing the loss between different models is meaningfull but I add the section for them.

|  D#  |  Time of one epoch[sec] <br>total/train/val  |Loss of one epoch <br> train/val |
| :----: | :----: |:----: |
|  0  |  384/319/65 | 20491/3643 |
|  1  |  418/351/67 | 22103/4575 |
|  2  |  451/381/70 | 18487/2568 |
|  3  |  536/461/75 | 13545/1556 |
|  4  |  632/554/78 | 12094/1421 |
|  5  | 739/661/78  | 7951/839 |
|  6  |  779/691/88 | 7504/589 |
|  7  | 763/676/87  | 12749/901 |

<div style="text-align: center;">
Note. this data were created on version 2 on 28/11/2021.
</div>

In [None]:
num_d = np.array([0, 1, 2, 3, 4, 5, 6, 7])
time = np.array([384, 418, 451,536,632,739,779,763])
label = ["D0", "D1", "D2", "D3", "D4", "D5", "D6", "D7"]
plt.bar(num_d, time, tick_label=label, align="center")
plt.title("Relationship between D# and time for one epoch  ")
plt.xlabel("D#")
plt.ylabel("Time [sec]")

# Plot log

In [None]:
log_df = pd.read_csv('./effdet/log.csv')
train_log_df = log_df[log_df["status"]=="train"]
val_log_df = log_df[log_df["status"]=="val"]

In [None]:
train_log_df.plot(x='epoch', y='loss')
plt.ylabel(u'loss') 
plt.title(u'train loss', size=16)

In [None]:
val_log_df.plot(x='epoch', y='loss')
plt.ylabel(u'loss')
plt.title(u'validation loss', size=16)