In [6]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import shutil
from ultralytics import YOLO

In [7]:
np.random.seed(42)
torch.set_num_threads(2)
INPUT_DIR = './data/icebergs'
device = torch.cuda.is_available()

# Load data to DataFeame, make train/val split

In [8]:
train = pd.read_json(f'{INPUT_DIR}/train.json')
train['inc_angle'] = pd.to_numeric(train['inc_angle'],errors='coerce')
test = pd.read_json(f'{INPUT_DIR}/test.json')
test['inc_angle'] = pd.to_numeric(test['inc_angle'],errors='coerce')

In [9]:
new_imgs = []
for i, row in train.iterrows():
    new_imgs.append((row.id, f'./data/icebergs/train_imgs/{row.id}_1.jpg', row.is_iceberg, row.inc_angle))
    new_imgs.append((row.id, f'./data/icebergs/train_imgs/{row.id}_2.jpg', row.is_iceberg, row.inc_angle))
train_data = pd.DataFrame(new_imgs, columns = ['id', 'path', 'label', 'angle'])
new_imgs_test = []
for i, row in test.iterrows():
    new_imgs_test.append((row.id, f'./data/icebergs/test_imgs/{row.id}_1.jpg', row.inc_angle))
    new_imgs_test.append((row.id, f'./data/icebergs/test_imgs/{row.id}_2.jpg', row.inc_angle))
test_data = pd.DataFrame(new_imgs_test, columns = ['id', 'path', 'angle'])

In [158]:
train_ids, val_ids = train_test_split(train_data.id.unique(), test_size=.1, random_state=42)
val_data = train_data[train_data.id.isin(val_ids)]
train_data = train_data[train_data.id.isin(train_ids)]

In [182]:
ROOT_YOLO = './data/icebergs/yolo_dataset'
os.makedirs(f'{ROOT_YOLO}/train/0')
os.makedirs(f'{ROOT_YOLO}/train/1')
os.makedirs(f'{ROOT_YOLO}/val/0')
os.makedirs(f'{ROOT_YOLO}/val/1')

In [183]:
for i, row in train_data.iterrows():
    shutil.copy(row.path, f'{ROOT_YOLO}/train/{row.label}')

In [185]:
for i, row in val_data.iterrows():
    shutil.copy(row.path, f'{ROOT_YOLO}/val/{row.label}')

## Now go run yolo_train.py

When you have it ready, paste path to the weights to the cell below

In [15]:
model = YOLO('./icebergs_yolo/train8/weights/best.pt')

In [33]:
probas = []
for i, row in test_data.iterrows():
    probas.append(model(row.path)[0].probs.data[1].cpu().numpy().item())


image 1/1 e:\github\automl-itmo\data\icebergs\test_imgs\5941774d_1.jpg: 64x64 0 0.97, 1 0.03, 32.0ms
Speed: 1.1ms preprocess, 32.0ms inference, 0.0ms postprocess per image at shape (1, 3, 64, 64)

image 1/1 e:\github\automl-itmo\data\icebergs\test_imgs\5941774d_2.jpg: 64x64 1 0.69, 0 0.31, 10.0ms
Speed: 1.0ms preprocess, 10.0ms inference, 0.0ms postprocess per image at shape (1, 3, 64, 64)

image 1/1 e:\github\automl-itmo\data\icebergs\test_imgs\4023181e_1.jpg: 64x64 0 0.66, 1 0.34, 11.0ms
Speed: 1.0ms preprocess, 11.0ms inference, 0.0ms postprocess per image at shape (1, 3, 64, 64)

image 1/1 e:\github\automl-itmo\data\icebergs\test_imgs\4023181e_2.jpg: 64x64 0 0.61, 1 0.39, 10.0ms
Speed: 1.0ms preprocess, 10.0ms inference, 0.0ms postprocess per image at shape (1, 3, 64, 64)

image 1/1 e:\github\automl-itmo\data\icebergs\test_imgs\b20200e4_1.jpg: 64x64 0 1.00, 1 0.00, 12.0ms
Speed: 1.0ms preprocess, 12.0ms inference, 0.0ms postprocess per image at shape (1, 3, 64, 64)

image 1/1 e:\g

In [37]:
test_data['is_iceberg'] = probas

In [39]:
s = test_data.groupby(['id']).mean().reset_index()

In [42]:
s = s.drop(['angle'], axis=1)

In [44]:
s.to_csv('./data/submissions/iceber_yolo_submisssion.csv', index=False)