In [1]:
import cv2
import pandas as pd
from skimage import io
import matplotlib.pyplot as plt

In [None]:
train_df = pd.read_csv('data/train.csv')
train_df.head()

In [4]:
def plot_bboxes(df: pd.DataFrame, upd_path=True):
    for _, row in df.iterrows():
        img = io.imread(('data/bear_images/bear_images/' if upd_path else '') + row['file_name'])
        cv2.rectangle(img, (row['x1'], row['y1']), (row['x2'], row['y2']), (255, 0, 0), 5)
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(img, str(row['confidence']), (row['x1'], row['y1'] - 10), font, 1, (0, 255, 0),
                    5)
        plt.figure(figsize=(15, 10))
        plt.title(row['file_name'])
        plt.imshow(img)
        plt.axis('off')
        plt.show()

In [None]:
plot_bboxes(train_df[train_df.confidence == 1.0][:5])

In [14]:
sample_submission = pd.read_csv('data/sample_submission.csv')

In [None]:
plot_bboxes(sample_submission[sample_submission.confidence > 0])

# Solution

In [4]:
import numpy as np
import pandas as pd
import torch
from skimage import io
from torch.utils.data import DataLoader, Dataset

import torchvision
from sklearn.model_selection import train_test_split

import torchvision.transforms as T
from tqdm import tqdm

import os
import datetime as dt

In [5]:
SEED = 42
NUM_WORKERS = 4
NUM_EPOCH = 50

In [6]:
import random


def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(SEED)

In [7]:
class BearDataset(Dataset):
    def __init__(self, root_dir: str, csv_path, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        df = pd.read_csv(csv_path)
        self.files = list(df["file_name"].map(lambda p: os.path.join(root_dir, p)))
        self.targets = df.drop(columns=['file_name'])

    def __getitem__(self, index):
        img = io.imread(self.files[index])
        img = img[:, :, :3]
        target = self.targets.iloc[index, :]
        if self.transform is not None:
            img = self.transform(img)

        is_bear = target['confidence'] > 0
        boxes = [target["x1"], target["y1"], target["x2"] if is_bear else img.shape[2], target["y2"] if is_bear else img.shape[1]]
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        return img, {"boxes": boxes, "labels": torch.as_tensor(1 if is_bear else 0, dtype=torch.int64),
                     "scores": torch.as_tensor(1, dtype=torch.float32)}

    def __len__(self):
        return len(self.files)

In [8]:
class BearTestDataset(Dataset):
    def __init__(self, root_dir: str, csv_path, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        df = pd.read_csv(csv_path)
        self.files = list(df["file_name"].map(lambda p: os.path.join(root_dir, p)))

    def __getitem__(self, index):
        img = io.imread(self.files[index])
        img = img[:, :, :3]
        if self.transform is not None:
            img = self.transform(img)

        return img, {}

    def __len__(self):
        return len(self.files)

In [11]:
def get_model():
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    return model


def get_transform(train):
    transforms = [
        T.ToTensor()
    ]
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [12]:
dataset_train_raw = BearDataset('./data/bear_images/bear_images', './data/train.csv',
                                get_transform(train=True))
dataset_test_raw = BearDataset('./data/bear_images/bear_images', './data/train.csv',
                               get_transform(train=False))

In [13]:
labels = [c > 0 for c in dataset_train_raw.targets.confidence]
indices = list(range(len(dataset_train_raw)))
ind_train, ind_test, _, _ = train_test_split(indices, labels, test_size=0.2, random_state=SEED,
                                             stratify=labels)

In [14]:
dataset_train = torch.utils.data.Subset(dataset_train_raw, ind_train)
dataset_test = torch.utils.data.Subset(dataset_test_raw, ind_test)

data_loader_train = DataLoader(
    dataset_train, shuffle=True, num_workers=NUM_WORKERS)

data_loader_test = DataLoader(
    dataset_test, shuffle=False, num_workers=NUM_WORKERS)

In [21]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = get_model()

model.to(device)



In [38]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.001,
                            momentum=0.9, weight_decay=0.0005)

In [16]:
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=1,
                                                   gamma=0.1)

In [17]:
!export PYTORCH_CUDA_ALLOC_CONF=garbage_collection_threshold:0.7

In [18]:
def sanitize_pred(yhat_i):
    if yhat_i['scores'].dim() > 0 and len(yhat_i['scores']) == 0:
        yhat_i['scores'] = torch.as_tensor([0], dtype=torch.float32)
        yhat_i['boxes'] = torch.as_tensor([[0, 0, 0, 0]], dtype=torch.float32)
        yhat_i['labels'] = torch.as_tensor([0])

In [19]:
from sklearn.metrics import mean_squared_error


def mcrmse(yhat, y):
    sanitize_pred(yhat[0])

    best_box_idx = max(range(len(yhat[0]['scores'])), key=lambda i: yhat[0]['scores'][i])
    score = 0
    for i in range(4):
        score += mean_squared_error([yhat[0]['boxes'][best_box_idx].cpu()[i] if yhat[0]['labels'].cpu()[best_box_idx] == 1 else 0], [y[0]['boxes'].cpu()[0][i] if y[0]['labels'].cpu().item() == 1 else 0],
                                    squared=False)

    return score / 4

In [22]:
from copy import deepcopy

best_score = None
best_model = None

In [None]:
train_losses = []
test_losses = []
for i in range(NUM_EPOCH):
    train_losses_batch = []
    test_losses_batch = []

    model.train()
    for images, targets in tqdm(data_loader_train, desc='train'):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in [targets]]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        train_losses_batch.append(losses.item())

    # lr_scheduler.step()
    model.eval()
    for images, targets in tqdm(data_loader_test, desc='test'):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in [targets]]
        with torch.set_grad_enabled(False):
            output = model(images)
            loss = mcrmse(output, targets)
            test_losses_batch.append(loss)

    train_loss = np.mean(train_losses_batch)
    test_loss = np.mean(test_losses_batch)

    if best_score is None or test_loss < best_score:
        best_score = test_loss
        best_model = deepcopy(model)

    train_losses.append(train_loss)
    test_losses.append(test_loss)

    print(f'Epoch {i + 1}/{NUM_EPOCH}\t{train_loss=}\t{test_loss=}')

In [43]:
print(best_score)

10.272993592654958


In [25]:
torch.save(best_model.state_dict(), f'weights/model_{dt.datetime.now().strftime("%Y%m%d_%H%M%S")}.pt')

In [26]:
def pred_to_df(yhat, file_names, use_best=False):
    def get_best_idx(yhat_i):
        return max(range(len(yhat_i['scores'].cpu())), key=lambda j: yhat_i['scores'].cpu()[j])

    def get_p(idx):
        pts = []
        for yhat_i in yhat:
            if use_best:
                best_idx = get_best_idx(yhat_i)
                if yhat_i['labels'][best_idx] != 1:
                    pt = 0
                else:
                    pt = yhat_i['boxes'].cpu()[best_idx][idx]
            else:
                pt = yhat_i['boxes'].cpu()[idx]
            pts.append(pt)
        return np.array(pts, dtype=np.int64)

    [sanitize_pred(yi) for yi in yhat]
    df = pd.DataFrame(columns=train_df.columns)
    df['file_name'] = file_names
    df['x1'] = get_p(0)
    df['y1'] = get_p(1)
    df['x2'] = get_p(2)
    df['y2'] = get_p(3)
    df['confidence'] = [1.0 * (yi['labels'].cpu()[get_best_idx(yi)].item() if use_best else yi['labels'].cpu().item()) for yi in yhat]
    return df

In [None]:
result = pred_to_df([dataset_test_raw[0][1]], [dataset_test_raw.files[0]])
plot_bboxes(result, False)

In [48]:
dataset_test[0]

(tensor([[[0.3216, 0.3255, 0.3255,  ..., 0.1529, 0.1490, 0.1216],
          [0.3255, 0.3255, 0.3255,  ..., 0.1686, 0.1608, 0.1373],
          [0.3294, 0.3294, 0.3294,  ..., 0.1804, 0.1608, 0.1412],
          ...,
          [0.7333, 0.7333, 0.7333,  ..., 0.6000, 0.6000, 0.5961],
          [0.7333, 0.7333, 0.7333,  ..., 0.6000, 0.5961, 0.5922],
          [0.7333, 0.7333, 0.7373,  ..., 0.5961, 0.5922, 0.5922]],
 
         [[0.3373, 0.3412, 0.3451,  ..., 0.0941, 0.0980, 0.0784],
          [0.3373, 0.3412, 0.3451,  ..., 0.1059, 0.1059, 0.0863],
          [0.3412, 0.3412, 0.3412,  ..., 0.1176, 0.1020, 0.0863],
          ...,
          [0.8039, 0.8039, 0.8039,  ..., 0.6667, 0.6667, 0.6627],
          [0.8039, 0.8039, 0.8039,  ..., 0.6706, 0.6667, 0.6627],
          [0.8039, 0.8039, 0.8078,  ..., 0.6667, 0.6627, 0.6627]],
 
         [[0.3373, 0.3373, 0.3373,  ..., 0.1059, 0.1020, 0.0745],
          [0.3373, 0.3373, 0.3373,  ..., 0.1137, 0.1059, 0.0784],
          [0.3412, 0.3373, 0.3373,  ...,

In [None]:
from random import randint
with torch.set_grad_enabled(False):
    idx = randint(0, len(dataset_test_raw))
    pred = model([dataset_test_raw[idx][0].to(device)])
    result_pred = pred_to_df(pred, [dataset_test_raw.files[idx]], use_best=True)
    plot_bboxes(pred_to_df([dataset_test_raw[idx][1]], [dataset_test_raw.files[idx]]), False)
    plot_bboxes(result_pred, False)

In [37]:
model.load_state_dict(torch.load('./weights/model_20230319_154804.pt'))

<All keys matched successfully>

In [27]:
test_data = BearTestDataset('data/bear_images/bear_images', 'data/test.csv', get_transform(False))

In [None]:
from random import randint

model.eval()
with torch.set_grad_enabled(False):
    idx = randint(0, len(test_data))
    pred = model([test_data[idx][0].to(device)])
    result_pred = pred_to_df(pred, [test_data.files[idx]], use_best=True)
    plot_bboxes(result_pred, False)

In [32]:
y_pred = []
for img, _ in tqdm(test_data):
    with torch.set_grad_enabled(False):
        pred = best_model([img.to(device)])
        y_pred.append(pred)

100%|██████████| 149/149 [00:23<00:00,  6.26it/s]


In [None]:
submission = pred_to_df(sum(y_pred, []), test_data.files, True)
submission.file_name = submission.file_name.apply(lambda p: os.path.basename(p))
submission.head()

In [35]:
submission.to_csv(f'./data/submission_{dt.datetime.now().strftime("%Y%m%d_%H%M%S")}.csv', index=False)

In [None]:
plot_bboxes(submission[submission.confidence == 0])

# Blend

In [5]:
sus_images = ['image_116.jpeg', 'image_166.jpeg', 'image_170.png', 'image_346.jpeg', 'image_348.jpeg', 'image_375.png', 'image_391.jpeg', 'image_396.jpeg', 'image_437.jpeg', 'image_485.jpeg']

In [4]:
best_submission = pd.read_csv('data/submission_20230319_004813.csv')
prev_submission = pd.read_csv('data/submission_20230319_154945.csv')
last_submission = pd.read_csv('data/submission_20230319_155311.csv')

In [None]:
best_submission[best_submission.file_name.isin(sus_images)]

In [None]:
plot_bboxes(best_submission[best_submission.file_name.isin(sus_images)])

In [None]:
plot_bboxes(prev_submission[prev_submission.file_name.isin(sus_images)])

In [11]:
blended = last_submission.set_index('file_name')
blended.update(prev_submission[prev_submission.file_name.isin(sus_images) & prev_submission.confidence > 0].set_index('file_name'))
blended.update(best_submission[best_submission.file_name.isin(sus_images) & best_submission.confidence > 0].set_index('file_name'))

In [13]:
blended = blended.reset_index()

In [18]:
import numpy as np

In [19]:
blended[['x1', 'y1', 'x2', 'y2']] = blended[['x1', 'y1', 'x2', 'y2']].astype(np.int64)

In [None]:
blended.head()

In [None]:
plot_bboxes(blended[blended.file_name.isin(sus_images)])

In [23]:
import datetime as dt

In [24]:
blended.to_csv(f'./data/submission_{dt.datetime.now().strftime("%Y%m%d_%H%M%S")}.csv', index=False)

# Blend 2

In [3]:
best_submission = pd.read_csv('data/submission_20230319_173121.csv')

In [None]:
plot_bboxes(best_submission[best_submission.confidence == 1])

In [6]:
images = ['image_292.png', 'image_286.png', 'image_235.png', 'image_224.png', 'image_218.png', 'image_179.jpeg', 'image_166.jpeg']

In [8]:
import os

In [None]:
for submission in filter(lambda p: p.startswith('submission'), os.listdir('data')):
    print(submission)
    df = pd.read_csv('data/' + submission)
    plot_bboxes(df[df['file_name'].isin(images[4:])])

In [13]:
to_replace = {
    'image_286.png': 'submission_20230319_004813.csv',
    'image_235.png': 'submission_20230319_154945.csv',
    'image_224.png': 'submission_20230319_154945.csv',
    'image_179.jpeg': 'submission_20230319_154945.csv',
    'image_218.png': 'submission_20230319_004813.csv',
}

In [None]:
blended = best_submission.set_index('file_name')
for img, take_from in to_replace.items():
    other = pd.read_csv('data/' + take_from)
    blended.update(other[other.file_name == img].set_index('file_name'))

blended = blended.reset_index()
blended.head()

In [None]:
import numpy as np
blended[['x1', 'y1', 'x2', 'y2']] = blended[['x1', 'y1', 'x2', 'y2']].astype(np.int64)
blended.head()

In [None]:
plot_bboxes(blended[blended.confidence == 1])

In [18]:
import datetime as dt
blended.to_csv(f'./data/submission_{dt.datetime.now().strftime("%Y%m%d_%H%M%S")}.csv', index=False)