### Example from "A gentle introduction to computer vision-based specimen classification in ecological datasets" by Jarret Blair
Based on data from: https://github.com/Jarrett-Blair/Intro-to-CV-for-Ecologists/tree/main

In [5]:
!mkdir -p blair \
    && cd blair \
    && wget https://github.com/Jarrett-Blair/Intro-to-CV-for-Ecologists/raw/refs/heads/main/Data/Images.zip \
    && unzip Images.zip \
    && rm Images.zip \
    && mv Images/training train \
    && mv Images/testing test \
    && rm -rf Images

--2025-04-11 16:48:35--  https://github.com/Jarrett-Blair/Intro-to-CV-for-Ecologists/raw/refs/heads/main/Data/Images.zip
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/Jarrett-Blair/Intro-to-CV-for-Ecologists/refs/heads/main/Data/Images.zip [following]
--2025-04-11 16:48:35--  https://raw.githubusercontent.com/Jarrett-Blair/Intro-to-CV-for-Ecologists/refs/heads/main/Data/Images.zip
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 32191966 (31M) [application/zip]
Saving to: ‘Images.zip’


2025-04-11 16:48:36 (30.4 MB/s) - ‘Images.zip’ saved [32191966/32191966]

Archive:  Images.zip
   creating: I

In [7]:
!cd blair \
    && mt_train -i train --model efficientnet_b0 --batch_size 32 --epochs 5 --warmup_epochs 1 --tensorboard
#
#    && mt_predict -i test -o . --class_index class_index.json --model efficientnet_b0 --weights efficientnet_b0_full_e5.pt --training_format --verbose

Building datasets with image size (256, 256)
Preprocessing training images...: 100%|███| 4151/4151 [00:03<00:00, 1103.52it/s]
Preprocessing validation images...: 100%|███| 465/465 [00:00<00:00, 1081.29it/s]
Start training
acc1= 88.2 | acc5=100.0 | loss= 1.22                                            
acc1= 76.5 | acc5=100.0 | loss= 1.20                                            
acc1=100.0 | acc5=100.0 | loss=0.801                                            
acc1=100.0 | acc5=100.0 | loss=0.704                                            
acc1=100.0 | acc5=100.0 | loss=0.691                                            
Training time 0:00:52


In [None]:
# Test that all species can be resolved properly (they can)
import os
from hierarchical.guillaume.gbif import resolve_id, name_to_id

sp2id = dict()

for species in os.listdir("blair/train"):
    id, rank, confidence = name_to_id(species)
    result = resolve_id(id)
    # print("Resolved", species, f"to {rank} ({confidence}%):")
    # print(result)
    # print()
    sp2id[species] = result["species"][0]

In [None]:
import os

from hierarchical.base.integration import HierarchicalBuilder
from hierarchical.guillaume.gbif import name_to_id
from hierarchical.guillaume.setup import ids_to_combinations


def folder_to_combinations(dir : str):
    ids = [name_to_id(os.path.basename(subdir))[0] for subdir in os.listdir(dir)]
    return ids_to_combinations(ids, verbose=False)

HierarchicalBuilder.spec_model_dataloader(
    path="blair/hierarchical_class_index.json",
    dir="blair/train",
    dir2comb_fn=folder_to_combinations 
)
pass

In [11]:
def tensorboard_logger_kwargs(name : str, output : str):
    from torch.utils.tensorboard.writer import SummaryWriter

    from mini_trainer.utils import increment_name_dir
    from mini_trainer.utils.logging import MetricLoggerWrapper
    from mini_trainer.utils.tensorboard import TensorboardLogger
    
    run_name = increment_name_dir(name, tensorboard_dir := os.path.join(output, "tensorboard"))
    tensorboard_writer = SummaryWriter(os.path.join(tensorboard_dir, run_name), flush_secs=30)
    
    return {
        "verbose" : True,
        "logger_cls" : [MetricLoggerWrapper, TensorboardLogger],
        "logger_cls_extra_kwargs" : [{}, {"writer" : tensorboard_writer}]
    }

In [None]:
from mini_trainer.train import main as mt_train
from hierarchical.guillaume.hierarchical import HierarchicalPathParser, DEFAULT_HIERARCHY_LEVELS

name = "hierarchical_efficientnet_b0_full_e5"
output = "blair"

mt_train(
    input="blair/train",
    output=output,
    class_index="blair/hierarchical_class_index.json",
    name=name,
    # weights="hierarchical/gmo_traits_2.pt",
    epochs=50,
    dtype="bfloat16",
    device="cuda:0",
    builder=HierarchicalBuilder,
    model_builder_kwargs={"model_type" : "efficientnet_b0"},
    dataloader_builder_kwargs={
        "data_index" : None,
        "batch_size" : 32,
        "resize_size": 256, 
        # "train_proportion": 0.9,
        "path2cls2idx_builder" : HierarchicalPathParser,
        "path2cls2idx_builder_kwargs" : {
            "class_index" : "blair/hierarchical_class_index.json",
            "levels" : DEFAULT_HIERARCHY_LEVELS,
            "as_tensor" : True,
            "name2cls" : sp2id
        }
    },
    optimizer_builder_kwargs={"lr" : 0.001},
    criterion_builder_kwargs={"label_smoothing" : 0.01, "weights" : [1, 1, 0]}, #  non-hierarchical: [1, 0, 0] | different hierarchical weightings: [0.1, 0.25, 0.65], [0.65, 0.25, 0.1], [1, 1, 1]
    lr_schedule_builder_kwargs={"warmup_epochs" : 2},
    logger_builder_kwargs=tensorboard_logger_kwargs(name, output)
)

Building datasets with image size 256
Start training


                                                                                

acc1= 20.0 | acc5= 85.0 | loss=  3.3


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.716


                                                                                

acc1= 30.0 | acc5= 85.0 | loss=  3.3


                                                                                

acc1= 80.0 | acc5= 95.0 | loss= 1.60


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.650


                                                                                

acc1= 80.0 | acc5=100.0 | loss=0.882


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.641


                                                                                

acc1= 75.0 | acc5= 95.0 | loss= 1.15


                                                                                

acc1= 80.0 | acc5=100.0 | loss= 1.43


                                                                                

acc1=100.0 | acc5=100.0 | loss=0.575


                                                                                

acc1= 85.0 | acc5= 90.0 | loss= 1.37


                                                                                

acc1= 70.0 | acc5=100.0 | loss= 1.74


                                                                                

acc1= 20.0 | acc5= 95.0 | loss= 3.05


                                                                                

acc1= 90.0 | acc5=100.0 | loss=0.806


                                                                                

acc1= 85.0 | acc5=100.0 | loss=0.790


                                                                                

acc1= 85.0 | acc5=100.0 | loss=0.802


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.386


                                                                                

acc1= 80.0 | acc5=100.0 | loss= 1.15


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.329


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.448


                                                                                

acc1= 90.0 | acc5=100.0 | loss=0.722


                                                                                

acc1= 80.0 | acc5=100.0 | loss=0.766


                                                                                

acc1= 60.0 | acc5= 95.0 | loss= 1.88


                                                                                

acc1= 90.0 | acc5=100.0 | loss=0.632


                                                                                

acc1= 90.0 | acc5=100.0 | loss=0.810


                                                                                

acc1= 90.0 | acc5=100.0 | loss=0.534


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.334


                                                                                

acc1=100.0 | acc5=100.0 | loss=0.242


                                                                                

acc1= 90.0 | acc5=100.0 | loss=0.577


                                                                                

acc1=100.0 | acc5=100.0 | loss=0.294


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.560


                                                                                

acc1= 85.0 | acc5=100.0 | loss=0.628


                                                                                

acc1=100.0 | acc5=100.0 | loss=0.278


                                                                                

acc1=100.0 | acc5=100.0 | loss=0.260


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.349


                                                                                

acc1=100.0 | acc5=100.0 | loss=0.276


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.362


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.344


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.345


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.408


                                                                                

acc1=100.0 | acc5=100.0 | loss=0.281


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.374


                                                                                

acc1=100.0 | acc5=100.0 | loss=0.303


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.318


                                                                                

acc1=100.0 | acc5=100.0 | loss=0.269


                                                                                

acc1=100.0 | acc5=100.0 | loss=0.297


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.355


                                                                                

acc1=100.0 | acc5=100.0 | loss=0.263


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.394


                                                                                

acc1= 95.0 | acc5=100.0 | loss=0.320
Training time 0:10:33
