# Installation 

We recommend running this code in a Python virtual environment such as conda 😳




In [1]:
!pip install torch==1.13.1 torchvision==0.14.1 opencv-python==4.7.0.68 pandas==1.3.5
!pip install ultralytics==8.0.19 
!pip install scikit-learn tqdm



## Train/Validation Split
- 하나의 블록 구조만 있는 TRAIN_00263.jpg 까지는 Train Set에 포함하고 나머지 사진에 대해 Train/Validation 9:1 비율로 split

In [2]:
import pathlib 
from sklearn.model_selection import train_test_split
import shutil

images_with_annotation = [] 
for image in pathlib.Path('_dataset/data').glob('*'): 
    if image.suffix != '.txt': 
        if image.with_suffix('.txt').exists() and image.with_suffix('.txt').stat().st_size > 0:
            images_with_annotation.append(image)

images_num = len(images_with_annotation)
print(f'total image number : {images_num}')

basic_images =images_with_annotation[:264]
normal_images = images_with_annotation[264:]

train_size= (0.9 * images_num - len(basic_images)) / len(normal_images)
train, val = train_test_split(normal_images, train_size=train_size, random_state=0)


train += basic_images
print(f'train size : {len(train)}, val size : {len(val)}')

image_train_dir  = pathlib.Path('_dataset/images/train')
image_train_dir.mkdir(exist_ok=True, parents=True)
for image in train: 
    shutil.copy(image, image_train_dir / image.name) 

image_val_dir  = pathlib.Path('_dataset/images/val')
image_val_dir.mkdir(exist_ok=True, parents=True)
for image in val: 
    shutil.copy(image, image_val_dir / image.name) 

label_train_dir = pathlib.Path('_dataset/labels/train')
label_train_dir.mkdir(exist_ok=True, parents=True)
for image in train: 
    label = image.with_suffix('.txt')
    shutil.copy(label, label_train_dir / label.name)

label_val_dir = pathlib.Path('_dataset/labels/val')
label_val_dir.mkdir(exist_ok=True, parents=True)
for image in val: 
    label = image.with_suffix('.txt')
    shutil.copy(label, label_val_dir / label.name)

total image number : 9234
train size : 8310, val size : 924


## Offline Augmentation Code

In [None]:
#%% 
import cv2 
import numpy as np 
import pathlib
from matplotlib import pyplot as plt 
from tqdm import tqdm
import shutil
# %%

img_back_paths = [str(path) for path in pathlib.Path('Background_Augmentation').glob('**/*.*')]
print(len(img_back_paths))
#%%
in_path = pathlib.Path('_dataset/images')
out_path = pathlib.Path('dataset/images')
out_path.mkdir(exist_ok=True, parents=True)
(out_path / 'train').mkdir(exist_ok=True)
(out_path / 'val').mkdir(exist_ok=True)
paths = [str(path) for path in in_path.glob('**/*.jpg')]

for path in tqdm(paths):
    img = cv2.imread(str(path))
    #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_grey = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    
    while True: 
        try: 
            img_back = cv2.imread(np.random.choice(img_back_paths, 1)[0])
            img_back = cv2.resize(img_back,(400, 400),\
            interpolation=cv2.INTER_LINEAR)
            break
        except Exception as e: 
            pass 
    
    _, mask = cv2.threshold(img_grey, 200, 255, cv2.THRESH_BINARY)
    mask_blur = cv2.bitwise_not(cv2.medianBlur(mask, 5))
    img_fore = cv2.bitwise_and(img, img, mask=mask_blur)
    img_back = cv2.bitwise_and(img_back, img_back, mask=cv2.bitwise_not(mask_blur))

    image_final = cv2.bitwise_or(img_fore, img_back)
    #print(str(out_path / pathlib.Path(path).relative_to(in_path)))
    res = cv2.imwrite(str(out_path / pathlib.Path(path).relative_to(in_path)), image_final)
    if not res: 
        print('error: ' + str(out_path / pathlib.Path(path).relative_to(in_path)))



In [None]:
# copy label from _dataset to dataset to make it easy  
shutil.copytree('_dataset/labels', 'dataset/labels')

# Train

In [None]:
# to setup dataset.yaml
import os 
import yaml

with open("dataset.yaml", "w") as f:
    dataset = {
        
        'nc': 10, 
        'names' : {
            0: 'A',
            1: 'B',
            2: 'C',
            3: 'D',
            4: 'E',
            5: 'F',
            6: 'G',
            7: 'H',
            8: 'I',
            9: 'J',
        },
        'path' : '/'.join([os.getcwd(), 'dataset']),
        'train' : 'images/train',
        'val' : 'images/val',
    }
    yaml.dump(dataset, f)

In [None]:
!yolo task=detect mode=train epochs=3000 batch=48 model=yolov8x.pt data='dataset.yaml' pretrained=True max_det=20 optimizer=AdamW warmup_epochs=10 fl_gamma=1.5 label_smoothing=0.1 cos_lr=True lr0=0.001 device=\'0,1\' plots=True cache=True

## Submission CSV Generation

In [None]:
#%%
from ultralytics import YOLO

import pathlib 
from tqdm import tqdm
import pandas as pd
import cv2
import numpy as np 
# Load a model

model = YOLO('submission.pt') #to use our trained model 
#odel = YOLO("runs/detect/train/weights/best.pt")  # !!! Modify train# number in Context! !!! 

# Predict with the model  # predict on an image
submission = pd.read_csv('sample_submission.csv')

class2name = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
files = [str(f) for f in pathlib.Path('/home/ai/DATASET/230108_4dblock/test').glob('*')]
files.sort()

results = [] 
for i in tqdm(range(len(files))):
    result = model(files[i])
    results.append(result)


log = []
submission_df = pd.read_csv('sample_submission.csv')

for i, result in tqdm(enumerate(results)): 
    classes = list(result[0].cpu().boxes.cls.tolist())
    names = [class2name[int(c)] for c in classes]
    xyxy = result[0].boxes.cpu().xyxy.numpy()
    conf = result[0].boxes.cpu().conf.numpy()

    # rule 1 
    if names.count('A') > 1: 
        names.append('B')
        log.append({'name' : files[i], 'add' : 'B', 'rule' : 1})
    if names.count('B') > 1: 
        names.append('A')
        log.append({'name' : files[i], 'add' : 'A', 'rule' : 1})
    # rule 2
    if names.count('I') > 1:
        log.append({'name' : files[i], 'add' : 'J', 'rule' : 2})
        names.append('J')

    # rule 3
    if names.count('E') > 1:
        log.append({'name' : files[i], 'add' : 'A', 'rule' : 3})
        log.append({'name' : files[i], 'add' : 'B', 'rule' : 3})
        names.append('A')
        names.append('B')

    uniq_names = list(set(names))
    submission_df.loc[i, uniq_names] = 1

log_df = pd.DataFrame(log)
log_df.to_csv('log.csv')

submission_df.to_csv('submission.csv', index=False)
print(log_df)
