# Build a Classifier from GBIF

Fill out the search parameters using the keys listed in GBIF's documentation: https://pygbif.readthedocs.io/en/latest/modules/
occurrence.html#pygbif.occurrences.search

Select which group to train on (currently only `scientificName` is supported).

In [1]:
import os
import bplusplus
from typing import Any

names = [
    "Nabis rugosus", 
    "Forficula auricularia",
    "Calosoma inquisitor",
    "Bombus veteranus",
    "Glyphotaelius pellucidus",
    "Notoxus monoceros",
    "Cacoxenus indagator",
    "Chorthippus mollis",
    "Trioza remota"
]

search: dict[str, Any] = {
    "scientificName": names,
    "country": ["US", "NL"]
}

bplusplus.build_model(
    group_by_key=bplusplus.Group.scientificName,
    search_parameters=search, images_per_group=150,
    model_output_folder=os.path.join('model')
)

Temporary directory path: /var/folders/_c/x01r40x56pd9c88tgbz2hdqr0000gn/T/tmpkg6mk555
Creating folders for images...
Beginning to collect images from GBIF...
Collecting images for Nabis rugosus...
Nabis rugosus : 280 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Nabis rugosus folder...
Collecting images for Forficula auricularia...
Forficula auricularia : 2980 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Forficula auricularia folder...
Collecting images for Calosoma inquisitor...
Calosoma inquisitor : 2629 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Calosoma inquisitor folder...
Collecting images for Bombus veteranus...
Bombus veteranus : 1075 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Bombus veteranus folder...
Collecting images for Glyphotaelius pellucidus...
Glyphotaelius pellucidus : 5696 parseable occurrences fetched, will sampl

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-cls.pt to 'model/yolov8n-cls.pt'...


Finished collecting images.
Dataset splitting completed successfully.


100%|██████████| 5.28M/5.28M [00:00<00:00, 16.8MB/s]
  return torch.load(file, map_location='cpu'), file  # load
New https://pypi.org/project/ultralytics/8.2.75 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.195 🚀 Python-3.12.4 torch-2.4.0 CPU (Apple M1 Max)
[34m[1mengine/trainer: [0mtask=classify, mode=train, model=model/yolov8n-cls.pt, data=model, epochs=5, patience=50, batch=16, imgsz=224, save=True, save_period=-1, cache=False, device=None, workers=8, project=model, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, sh

Cleaned up temporary directory: /var/folders/_c/x01r40x56pd9c88tgbz2hdqr0000gn/T/tmpkg6mk555


# Run Model

Now you can run the model

In [15]:
from ultralytics import YOLO

# Load a model
path_to_model = "model/train2/weights/best.pt"
model = YOLO(path_to_model)  # load a custom model

# Predict with the model
path_to_image1 = "nabis_test.jpg"
path_to_image2 = "forficula_test.jpeg"

results1 = model(path_to_image1, save_txt = False)  # predict on an image
results2 = model(path_to_image2, save_txt = False)  # predict on an image

print(results1)
print(results2)




  return torch.load(file, map_location='cpu'), file  # load

image 1/1 /Users/deniz/Build/scl/Bplusplus/notebooks/nabis_test.jpg: 224x224 Nabis rugosus 0.91, Chorthippus mollis 0.05, Forficula auricularia 0.03, Glyphotaelius pellucidus 0.01, Calosoma inquisitor 0.00, 7.2ms
Speed: 0.6ms preprocess, 7.2ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

image 1/1 /Users/deniz/Build/scl/Bplusplus/notebooks/forficula_test.jpeg: 224x224 Forficula auricularia 0.91, Glyphotaelius pellucidus 0.07, Chorthippus mollis 0.01, Nabis rugosus 0.01, Trioza remota 0.00, 4.2ms
Speed: 1.4ms preprocess, 4.2ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)


[ultralytics.engine.results.Results object with attributes:

boxes: None
keypoints: None
masks: None
names: {0: 'Bombus veteranus', 1: 'Cacoxenus indagator', 2: 'Calosoma inquisitor', 3: 'Chorthippus mollis', 4: 'Forficula auricularia', 5: 'Glyphotaelius pellucidus', 6: 'Nabis rugosus', 7: 'Notoxus monoceros', 8: 'Trioza remota'}
orig_img: array([[[ 16, 105,  56],
        [ 15, 104,  55],
        [ 14, 103,  54],
        ...,
        [  2,  19,  10],
        [  3,  20,  11],
        [  3,  20,  11]],

       [[ 16, 105,  56],
        [ 15, 104,  55],
        [ 15, 104,  55],
        ...,
        [  2,  19,  10],
        [  2,  19,  10],
        [  2,  19,  10]],

       [[ 15, 104,  55],
        [ 15, 104,  55],
        [ 15, 104,  55],
        ...,
        [  1,  18,   9],
        [  1,  18,   9],
        [  1,  18,   9]],

       ...,

       [[  1,   9,   2],
        [  1,   9,   2],
        [  1,   9,   2],
        ...,
        [ 32, 102,  55],
        [ 34, 103,  53],
        [ 33

# More Advanced Uses

## Collect images from GBIF into folders

Download the images directly using the search parameters, and images will be grouped into folders according to the `group_by_key`

In [None]:
import os
from typing import Any

directory_path = os.path.join('dataset')
names = [
    "Nabis rugosus", 
    "Forficula auricularia",
    "Calosoma inquisitor",
    "Bombus veteranus",
    "Glyphotaelius pellucidus",
    "Notoxus monoceros",
    "Cacoxenus indagator",
    "Chorthippus mollis",
    "Trioza remota"
]

import bplusplus

search: dict[str, Any] = { 
    "scientificName":  names, 
    "country": ["US", "NL"]
}

bplusplus.collect_images(group_by_key=bplusplus.Group.scientificName, search_parameters=search, images_per_group=150, output_directory=directory_path)

Creating folders for images...
Beginning to collect images from GBIF...
Collecting images for Nabis rugosus...
Nabis rugosus : 280 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Nabis rugosus folder...
Collecting images for Forficula auricularia...
Forficula auricularia : 2980 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Forficula auricularia folder...
Collecting images for Calosoma inquisitor...
Calosoma inquisitor : 2629 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Calosoma inquisitor folder...
Collecting images for Bombus veteranus...
Bombus veteranus : 1075 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Bombus veteranus folder...
Collecting images for Glyphotaelius pellucidus...
Glyphotaelius pellucidus : 5696 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Glyphotaelius pellucidus folder...
Collecting

## Train and Validate the Model

Use the extracted images to train and validate a pytorch model

In [None]:
from bplusplus.train_validate import train_validate
import os

dataset_directory = os.path.join('data', 'dataset')
names = ["Nabis rugosus", "Forficula auricularia"]
output_path = os.path.join('data', 'output')

train_validate(groups=names, dataset_path=dataset_directory, output_directory=output_path)

FileNotFoundError: [Errno 2] No such file or directory: 'data/dataset/Nabis rugosus'