# FiftyOne

In [None]:
import fiftyone as fo
import fiftyone.zoo as foz

import pandas as pd
import numpy as np
import json
from tqdm import tqdm

# 훈련 셋 선언

In [None]:
classes = ["General trash", "Paper", "Paper pack", "Metal", "Glass", 
           "Plastic", "Styrofoam", "Plastic bag", "Battery", "Clothing"]

trainset = fo.Dataset.from_dir(
    dataset_type=fo.types.COCODetectionDataset,
    data_path='/opt/ml/dataset',
    labels_path='/opt/ml/dataset/train.json',
    name='trainset-full',
)

with fo.ProgressBar() as pb:
    for i, s in enumerate(pb(trainset)):
        label = fo.core.labels.NumericAttribute(value=float(len(s.detections.detections)))
        s['bbox_num'] = label
        s.save()

# 테스트 셋

In [None]:
testset = fo.Dataset.from_dir(
    dataset_type=fo.types.COCODetectionDataset,
    data_path='/opt/ml/dataset',
    labels_path='/opt/ml/dataset/test.json',
    name='testset',
)

# Bounding Box 예측값 불러오기

In [None]:
csv_path = '/opt/ml/notebooks/sub.csv'

df = pd.read_csv(csv_path)
df.drop('Unnamed: 0', axis=1, inplace=True)
for i, row in df.iterrows():
    assert type(row['PredictionString']) != float, i
    df[df.iloc[:,0].isna()]

df.head()

# 예측 bbox 삽입

In [None]:
# bbox 삽입. 3분 정도 걸림 (박스 380,000개 기준)

with fo.ProgressBar() as pb:
    for i, s in enumerate(pb(testset)):
        preds, image_id = df.iloc[i]
        
        detections = []
        if type(preds) != float:
            L = preds.split()
            data = np.array(L).reshape(-1, 6)
            labels = data[:, 0].astype(int)
            scores = data[:, 1].astype(float)
            boxes = data[:, 2:].astype(float)

            # PASCAL VOC => COCO
            xmin, ymin, xmax, ymax = boxes.T
            x = xmin / 1024.
            y = ymin / 1024.
            w = (xmax - xmin) / 1024.
            h = (ymax - ymin) / 1024.
            new_boxes = np.stack([x, y, w, h], axis=1)


            for label, score, box in zip(labels, scores, new_boxes):
                det = fo.Detection(
                    label=classes[label],
                    bounding_box=box,
                    confidence=score
                )
                detections.append(det)
        else:
            print(f'Wrong value at {i}, "{preds}"')
        s['predictions'] = fo.Detections(detections=detections)
        s['pred_bbox_num'] = len(detections)
        s.save()

# 주목할 이미지들 태그 추가

In [None]:
train_tags = {
    # IoU 80이 넘는 박스가 있는 이미지
    'overlap 80': ['0096', '0414', '0811', '0895', '1389', '1477', '1530', '1618', '1975', \
                   '1980', '1986', '1992', '2075', '2327', '2369', '2850', '2956', '3042', \
                   '3195', '3370', '3427', '3654', '4041', '4047', '4446', '4537', '0956', \
                   '1576', '3898', '1722', '2522', '3778', '3358', '4261']
}

for tag, imgs in train_tags.items():
    for i in imgs:
        path = f'/opt/ml/dataset/train/{i}.jpg'
        sample = trainset[path]
        sample.tags.append(tag)
        sample.save()

In [None]:
tags = {
    # 배터리
    'battery': ['0096', '0209', '0598', '0693', '0738', '0842', '1057', '1251', '1298', \
                '1495', '1501', '1978', '2077', '2336', '2565', '2810', \
                '2872', '2890', '2957', '3017', '3157', '3373', '3544', '4011', '4513'
               ],
    
    # 어려운 샘플들
    'hard': [
        '0096', # 작은 배터리
        '0019', '0027', '0616', '0873', '2570', '4011', '4114', '4513'
    ],
    
    # bbox가 없는 이미지 (UniverseNet 5-fold 앙상블에서)
    'no-bbox': [
         '0017', '0022', '0474', '1195', '1208', '1291', '1328', '1329', '1474', '1497', \
         '1528', '1772', '2150', '2352', '2414', '2668', '2816', '3436', '3551', '3741', \
         '4362', '4605'
    ]
}

for tag, imgs in tags.items():
    for i in imgs:
        path = f'/opt/ml/dataset/test/{i}.jpg'
        sample = testset[path]
        sample.tags.append(tag)
        sample.save()

# 세션 실행

http://<서버 IP>:30001

In [None]:
%%capture
session = fo.launch_app(testset, port=30001);