<h2 style="font-family:verdana;"> <center>FathomNet 2023</center></h2>
<h3 style="font-family:verdana;"> <center>Shifting seas, shifting species: Out-of-sample detection in the deep ocean</center></h3>

<center>
<div>
    <img src="https://storage.googleapis.com/kaggle-competitions/kaggle/46149/logos/header.png" style="width:850px;height:300px">
</div>
</center>

# Table of Contents<a class='anchor' id='top'></a>

- [1. Imports and Setups](#import)
- [2. Data Loading](#loading)
- [3. Create YOLO Dataset](#dataset)
- [4.  Train Model](#train)
- [5. Model Result](#result)
- [6. Export](#export)

___
# Imports and Setups <a class='anchor' id='import'></a> [↑](#top)

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import os
import ast
import shutil
import numpy as np
import pandas as pd
import json
from PIL import Image, ImageDraw

import urllib.request, io
from tqdm import tqdm
from IPython.display import Image
from pathlib import Path
from sklearn.model_selection import train_test_split

In [None]:
import torch
print(torch.__version__)

import ultralytics
from ultralytics import YOLO
ultralytics.checks()

In [None]:
class Cfg:
    DATA_EXPLORING_ROOT = Path('./')
    INPUT_ROOT = Path('./')
    OUTPUT_ROOT = Path('/data/')
    
    IMAGES_ROOT = '/data/'
    TRAIN_IMAGES_ROOT = IMAGES_ROOT + 'train/'
    EVAL_IMAGES_ROOT = IMAGES_ROOT + 'eval/'
    DATASET_ROOT = OUTPUT_ROOT / 'dataset/'

    TRAIN_IMAGE_DATA = DATA_EXPLORING_ROOT / 'train_image_data.json'
    EVAL_IMAGE_DATA = DATA_EXPLORING_ROOT / 'eval_image_data.json'
    ANNOTATION_FILE = DATA_EXPLORING_ROOT / 'annotation.json'
    CATEGORY_KEY_FILE = INPUT_ROOT / 'category_key.json'
    SAMPLE_SUBMISSION_FILE = INPUT_ROOT / 'sample_submission.csv'
    
    DATASET_CONFIG = OUTPUT_ROOT / 'dataset.yaml'
    MODEL_NAME = 'FathomNet-YOLOv8'
    
    N_EPOCHS = 50
    N_BATCH = 8
    RANDOM_STATE = 2023
    SAMPLE_SIZE = 1.0
    TEST_SIZE = .2
    INDEX = 'id'

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl

# seaborn
custom_params = {
    'lines.linewidth': 1,
}

blues_palette=palette = sns.color_palette("Blues_r", n_colors=20)
reds_palette=palette = sns.color_palette("Reds_r", n_colors=20)
greys_palette = sns.color_palette("Greys", n_colors=10)

blue = blues_palette[1]
red = reds_palette[1]
two_colors = [blue, red]

sns.set()
sns.set_theme(
    style="whitegrid", 
    palette=blues_palette,
    rc=custom_params)

___
# Data Loading <a class='anchor' id='loading'></a> [↑](#top)

In [None]:
def read_category_keys(file=Cfg.CATEGORY_KEY_FILE, index_col=Cfg.INDEX):
    return pd.read_json(file).set_index(Cfg.INDEX)

# def read_sample_submission(file=Cfg.SAMPLE_SUBMISSION_FILE, index_col=Cfg.INDEX):
#     return pd.read_csv(file).set_index(Cfg.INDEX)

def read_train_image_data(file=Cfg.TRAIN_IMAGE_DATA, index_col=Cfg.INDEX):
    return pd.read_json(file).set_index(Cfg.INDEX)

def read_eval_image_data(file=Cfg.EVAL_IMAGE_DATA, index_col=Cfg.INDEX):
    return pd.read_json(file).set_index(Cfg.INDEX)

def read_annotation_data(file=Cfg.ANNOTATION_FILE, index_col=Cfg.INDEX):
    return pd.read_json(file).set_index(Cfg.INDEX)

In [None]:
# def download_image_by_url(image_url):
#     with urllib.request.urlopen(image_url) as url:
#         img = Image.open(url)
#         url.close()
#     return img

# def load_image(path):
#     return Image.open(path)

# def get_image_path_by_id(root, image_id):
#     path = Path(root) / data.loc[image_id]['file_name']
#     path.with_suffix('.jpg')
    
# def load_image_by_id(data, image_id, compressed=True):
#     file_name = data.loc[image_id]['file_name']
#     if compressed:
#         file_name = change_file_ext(file_name)
    
#     return load_image(file_name, compressed)

# def draw_rectangle(image, xy, color='red'):
#     img = ImageDraw.Draw(image) 
#     img.rectangle(xy, outline=color, width=3)
#     return image

In [None]:
category_data = read_category_keys()
train_image_data = read_train_image_data()
eval_image_data = read_eval_image_data()
annotation_data = read_annotation_data()

In [None]:
# category_data.sample(frac=Cfg.SAMPLE_SIZE, random_state=Cfg.RANDOM_STATE)

___
# Create YOLO Dataset<a class='anchor' id='dataset'></a> [↑](#top)

In [None]:
def create_bboxes(annotation_data):
    '''
    Args:
        annotation_data: dataframe of annotations with keys: image_id, category_id, bbox
    '''
    
    df = annotation_data.copy()
    # df['bbox'] = df['bbox'].apply(ast.literal_eval)

    groupby_image = df.groupby(by='image_id')
    df = groupby_image['bbox'].apply(list).reset_index(name='bboxes').set_index('image_id')
    df['category_ids'] = groupby_image['category_id'].apply(list)  
    return df

def create_yolo_dataset(image_data, annotation_data, data_type='train', image_root=Cfg.IMAGES_ROOT, dataset_root=Cfg.DATASET_ROOT):
    '''
    Args:
        image_data: dataframe with keys: image_id(index), file_name(.jpg), width, height
        annotation_data: dataframe of annotations with keys: image_id(index), category_id, bbox
        data_type: string from options ['train', 'eval', 'test']
        image_root: root path to images
        dataset_root: path to output dataset i.e. /data/dataset/
    '''

    bboxes_data = create_bboxes(annotation_data)
    image_ids = image_data.index
    
    for image_id in tqdm(image_ids, total=len(image_ids)):
        bounding_bboxes = bboxes_data['bboxes'].loc[image_id]
        category_ids = bboxes_data['category_ids'].loc[image_id]

        image_row = image_data.loc[image_id]
        image_width = image_row['width']
        image_height = image_row['height']
        
        file_name = Path(image_row['file_name']).with_suffix('.png')
        source_image_path = Cfg.TRAIN_IMAGES_ROOT / file_name
        target_image_path = dataset_root / f'images/{data_type}/{file_name}'
        label_path = (dataset_root / f'labels/{data_type}/{file_name}').with_suffix('.txt')
        
        #print(file_name)
        
        yolo_data = []
        for bbox, category in zip(bounding_bboxes, category_ids):
            x = bbox[0]
            y = bbox[1]
            w = bbox[2]
            h = bbox[3]
            x_center = x + w/2
            y_center = y + h/2
            x_center /= image_width
            y_center /= image_height
            w /= image_width
            h /= image_height
            
            yolo_data.append([category, x_center, y_center, w, h])

        yolo_data = np.array(yolo_data)

        # Create YOLO lable file
        np.savetxt(label_path, yolo_data, fmt=["%d", "%f", "%f", "%f", "%f"])

        # Copy image file
        shutil.copy(source_image_path, target_image_path)

In [1]:
!rm -rf /data/dataset

!mkdir /data/dataset -p
!mkdir /data/dataset/images -p
!mkdir /data/dataset/images/train -p
!mkdir /data/dataset/images/val -p
!mkdir /data/dataset/labels -p
!mkdir /data/dataset/labels/train -p
!mkdir /data/dataset/labels/val -p

!tree /data/dataset

[01;34m/data/dataset[00m
├── [01;34mimages[00m
│   ├── [01;34mtrain[00m
│   └── [01;34mval[00m
└── [01;34mlabels[00m
    ├── [01;34mtrain[00m
    └── [01;34mval[00m

6 directories, 0 files


In [None]:
data = train_image_data.sample(frac=Cfg.SAMPLE_SIZE, random_state=Cfg.RANDOM_STATE)
X_train, X_val = train_test_split(
    data, 
    test_size=Cfg.TEST_SIZE, 
    random_state=Cfg.RANDOM_STATE)

In [None]:
create_yolo_dataset(X_train, annotation_data, data_type='train')

In [None]:
create_yolo_dataset(X_val, annotation_data, data_type='val')

In [None]:
%%writefile dataset.yaml
path: /data/dataset
train: images/train
val: images/val
test: #images/eval  # test images (optional)

# Classes
names:
    0: Actinernus
    1: Metridium farcimen
    2: Actinopterygii
    3: Agonidae
    4: Antimora microlepis
    5: Moridae
    6: Sebastolobus
    7: Spectrunculus grandis
    8: Bothrocara brunneum
    9: Lycodes diapterus
    10: Lycodes pacificus
    11: Neptunea-Buccinum Complex
    12: Asthenactis
    13: Mediaster
    14: Mediaster aequalis
    15: Mediaster tenellus
    16: Myxoderma sacculatum
    17: Pycnopodia helianthoides
    18: Stylasterias forreri
    19: Thrissacanthias penicillatus
    20: Bathycrinidae
    21: Benthodytes
    22: Peniagone
    23: Psychronaetes
    24: Tromikosoma
    25: Hyalonema
    26: Swiftia simplex
    27: Corallium
    28: Paragorgia arborea
    29: Clavularia
    30: Anthoptilum grandiflorum
    31: Funiculina-Halipteris complex
    32: Halipteris californica
    33: Protoptilum
    34: Virgulariidae
    35: Chorilia longipes
    36: Eualus macrophthalmus
    37: Pasiphaea
    38: Plesionika
    39: Cirripedia


In [None]:
%%writefile dataset.yaml
path: ../dataset
train: images/train
val: images/val
test: # test images (optional)

# Classes
names:
    0: Actiniaria
    1: Actinernus
    2: Actiniidae
    3: Actinoscyphia
    4: Bolocera
    5: Dofleinia
    6: Hormathiidae
    7: Isosicyonis
    8: Liponema brevicorne
    9: Metridium farcimen
    10: Actinopterygii
    11: Agonidae
    12: Albatrossia pectoralis
    13: Alepocephalus tenebrosus
    14: Anarrhichthys ocellatus
    15: Anoplopoma fimbria
    16: Antimora microlepis
    17: Bathypterois
    18: Bathysaurus mollis
    19: Careproctus
    20: Careproctus kamikawai
    21: Careproctus melanurus
    22: Careproctus ovigerus
    23: Cataetyx
    24: Chaunacops coloratus
    25: Chilara taylori
    26: Coryphaenoides
    27: Gobiidae
    28: Icelinus
    29: Icelinus filamentosus
    30: Lepidion
    31: Liparidae
    32: Lophiiformes
    33: Luciobrotula
    34: Lumpenus sagitta
    35: Macrouridae
    36: Merluccius productus
    37: Moridae
    38: Myctophidae
    39: Nezumia liolepis
    40: Nezumia stelgidolepis
    41: Ophidiidae
    42: Ophiodon elongatus
    43: Paralepididae
    44: Paraliparis
    45: Plectobranchus evides
    46: Porichthys mimeticus
    47: Psychrolutes phrictus
    48: Psychrolutidae
    49: Scorpaeniformes
    50: Sebastes
    51: Sebastolobus
    52: Spectrunculus grandis
    53: Xeneretmus
    54: Zaniolepis frenata
    55: Zaniolepis latipinnis
    56: Anguilliformes
    57: Nettastoma parviceps
    58: Ophichthus frontalis
    59: Synaphobranchidae
    60: Eptatretus
    61: Bothrocara brunneum
    62: Eucryphycus californicus
    63: Lycenchelys
    64: Lycenchelys crotalinus
    65: Lycodapus
    66: Lycodes
    67: Lycodes brevipes
    68: Lycodes cortezianus
    69: Lycodes diapterus
    70: Lycodes pacificus
    71: Pachycara bulbiceps
    72: Zoarcidae
    73: Aeolidiidae sp. 1
    74: Akoya platinum
    75: Bathybembix
    76: Bathydoris aioca
    77: Buccinidae
    78: Caenogastropoda
    79: Dendronotus patricki
    80: Gastropoda
    81: Neptunea-Buccinum Complex
    82: Nudibranchia
    83: Patellogastropoda
    84: Pleurobranchaea californica
    85: Tritonia tetraquetra
    86: Ziminella vrijenhoeki
    87: Asteroidea
    88: Asthenactis
    89: Astropecten
    90: Benthopecten
    91: Ceramaster
    92: Crossaster
    93: Dipsacaster eximius
    94: Dytaster gilberti
    95: Forcipulatida
    96: Goniasteridae
    97: Henricia
    98: Solasteridae
    99: Hippasteria
    100: Hymenaster
    101: Lophaster
    102: Luidia foliolata
    103: Mediaster
    104: Mediaster aequalis
    105: Mediaster tenellus
    106: Myxoderma
    107: Myxoderma platyacanthum
    108: Myxoderma sacculatum
    109: Patiria miniata
    110: Paulasterias mcclaini
    111: Paxillosida
    112: Peribolaster biserialis
    113: Poraniopsis
    114: Poraniopsis inflata
    115: Pterasteridae
    116: Pycnopodia helianthoides
    117: Pythonaster pacificus
    118: Rathbunaster californicus
    119: Stylasterias forreri
    120: Thrissacanthias penicillatus
    121: Valvatida
    122: Zoroasteridae
    123: Antedonidae
    124: Crinoidea
    125: Bathycrinidae
    126: Bathymetrinae
    127: Hyocrinidae
    128: Pentametrocrinus paucispinulus
    129: Abyssocucumis abyssorum
    130: Apostichopus
    131: Apostichopus californicus
    132: Apostichopus leukothele
    133: Benthodytes
    134: Benthothuria
    135: Elpidia
    136: Holothuria (Vaneyothuria) zacae
    137: Holothuroidea
    138: Laetmogone
    139: Oneirophanta mutabilis complex
    140: Paelopatides confundens
    141: Pannychia
    142: Peniagone
    143: Pseudostichopus mollis
    144: Psolidae
    145: Psolus squamatus
    146: Psychronaetes
    147: Psychropotes depressa
    148: Psychropotidae
    149: Scotoplanes
    150: Synallactes
    151: Aporocidaris milleri
    152: Brisaster
    153: Cystechinus giganteus
    154: Cystechinus loveni
    155: Cystocrepis setigera
    156: Echinoidea
    157: Echinocrepis rostrata
    158: Echinothuriidae
    159: Strongylocentrotus fragilis
    160: Tromikosoma
    161: Acanthascinae
    162: Bathydorus
    163: Bolosominae
    164: Caulophacus
    165: Chonelasma
    166: Corbitellinae
    167: Dictyocalyx
    168: Docosaccus maculatus
    169: Euplectellidae
    170: Farrea
    171: Farrea truncata complex
    172: Heterochone calyx
    173: Hexactinellida
    174: Hyalonema
    175: Hyalonema (Corynonema) populiferum
    176: Hyalonema (Oonema) bianchoratum
    177: Lyssacinosida sp. 1
    178: Regadrella
    179: Rossellidae
    180: Sclerothamnopsis
    181: Staurocalyptus
    182: Staurocalyptus solidus
    183: yellow ruffled sponge
    184: Calyptrophora
    185: Chrysogorgia
    186: Chrysogorgia monticola
    187: Chrysogorgia pinnata
    188: Iridogorgia
    189: Isidella
    190: Isidella tentaculum
    191: Isididae
    192: Keratoisis
    193: Lepidisis
    194: Parastenella
    195: Primnoidae
    196: Acanthogorgia
    197: Gorgoniidae
    198: Leptogorgia
    199: Plexauridae
    200: Swiftia
    201: Swiftia kofoidi
    202: Swiftia simplex
    203: Corallium
    204: Paragorgiidae
    205: Paragorgia arborea
    206: Alcyoniidae
    207: Bathyalcyon robustum
    208: Gersemia juliepackardae
    209: Heteropolypus
    210: Heteropolypus ritteri
    211: Clavularia
    212: Stolonifera
    213: Acanthoptilum
    214: Anthoptilum grandiflorum
    215: Anthoptilum lithophilum
    216: Distichoptilum gracile
    217: Funiculina
    218: Funiculina-Halipteris complex
    219: Halipteris californica
    220: Kophobelemnidae
    221: Pennatula
    222: Pennatula phosphorea
    223: Pennatulacea
    224: Protoptilum
    225: Ptilosarcus gurneyi
    226: Stylatula
    227: Umbellula
    228: Virgulariidae
    229: Desmophyllum dianthus
    230: Fungiacyathus (Bathyactis) marenzelleri
    231: Scleractinia
    232: Alternatipathes
    233: Antipatharia
    234: Heteropathes
    235: Lillipathes
    236: Parantipathes
    237: Schizopathidae
    238: Umbellapathes
    239: Brachyura
    240: Cancridae
    241: Chionoecetes tanneri
    242: Chorilia longipes
    243: Macroregonia macrochira
    244: Majidae
    245: Metacarcinus magister
    246: Lithodes couesi
    247: Lithodidae
    248: Neolithodes diomedeae
    249: Paralithodes rathbuni
    250: Paralomis
    251: Paralomis cf. papillata
    252: Paralomis multispina
    253: Paralomis verrilli
    254: Caridea
    255: Eualus macrophthalmus
    256: Pandalus
    257: Pandalus ampla
    258: Pandalus platyceros
    259: Pasiphaea
    260: Plesionika
    261: Calocarides quinqueseriatus
    262: Chirostylidae
    263: Galatheoidea
    264: Munida
    265: Munida bapensis
    266: Munida quadrispina
    267: Munidopsis
    268: Munidopsis depressa
    269: Munidopsis kensmithi
    270: Munidopsis lignaria
    271: Munidopsis recta
    272: Munidopsis scotti
    273: Pleuroncodes planipes
    274: Cirripedia
    275: Scalpellidae
    276: Verum proximum
    277: Pycnogonida
    278: Annelida
    279: Canalipalpata
    280: Echiura
    281: Harmothoe
    282: Hirudinea
    283: Paradiopatra
    284: Peinaleopolynoe orphanae
    285: Polychaeta
    286: Polynoidae
    287: Sabellidae
    288: Serpulidae
    289: Terebellidae

In [None]:
model = YOLO('yolov8m.pt')

___
# Train Model<a class='anchor' id='train'></a> [↑](#top)

In [None]:
torch.cuda.empty_cache()

In [None]:
# Training.
results = model.train(
   data=str(Cfg.DATASET_CONFIG),
   epochs=Cfg.N_EPOCHS,
   imgsz=640,
   batch=Cfg.N_BATCH,
   save=True, 
   verbose=False,
   name=Cfg.MODEL_NAME)

___
# Model Result<a class='anchor' id='result'></a> [↑](#top)

In [None]:
result_root = Cfg.INPUT_ROOT / 'runs/detect' / Cfg.MODEL_NAME
!tree runs

In [None]:
pd.read_csv(result_root / 'results.csv')

In [None]:
Image(filename=result_root / 'results.png', width=1000)

In [None]:
Image(filename=result_root / 'labels.jpg', width=600)

In [None]:
Image(filename=result_root / 'labels_correlogram.jpg', width=600)

In [None]:
Image(filename=result_root / 'confusion_matrix.png', width=800)

In [None]:
Image(filename=result_root / 'train_batch0.jpg', height=800, width=1200)

In [None]:
Image(filename=result_root / 'train_batch2.jpg', height=800, width=1200)

In [None]:
Image(filename=result_root / 'val_batch0_labels.jpg', height=800, width=1200)

In [None]:
Image(filename=result_root / 'val_batch0_pred.jpg', height=800, width=1200)

___
# Predict<a class='anchor' id='export'></a> [↑](#top)

In [None]:
preds = model.predict(source=Cfg.EVAL_IMAGES_ROOT,
                      save=True,
                      save_txt=True,
                      save_conf=True)

___
# Export<a class='anchor' id='export'></a> [↑](#top)

In [None]:
model.export(format='onnx')

___