# Build a Classifier from GBIF

Fill out the search parameters using the keys listed in GBIF's documentation: https://pygbif.readthedocs.io/en/latest/modules/
occurrence.html#pygbif.occurrences.search

Select which group to train on (currently only `scientificName` is supported).

In [1]:
import os
import bplusplus
from typing import Any

names = [
    "Nabis rugosus", 
    "Forficula auricularia",
    "Calosoma inquisitor",
    "Bombus veteranus",
    "Glyphotaelius pellucidus",
    "Notoxus monoceros",
    "Cacoxenus indagator",
    "Chorthippus mollis",
    "Trioza remota"
]

search: dict[str, Any] = {
    "scientificName": names,
    "country": ["US", "NL"]
}

bplusplus.build_model(
    group_by_key=bplusplus.Group.scientificName,
    search_parameters=search, images_per_group=150,
    model_output_folder=os.path.join('model')
)

Temporary directory path: /var/folders/_c/x01r40x56pd9c88tgbz2hdqr0000gn/T/tmpsht614mw
Creating folders for images...
Beginning to collect images from GBIF...
Collecting images for Nabis rugosus...
Nabis rugosus : 280 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Nabis rugosus folder...
Collecting images for Forficula auricularia...
Forficula auricularia : 2980 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Forficula auricularia folder...
Cleaned up temporary directory: /var/folders/_c/x01r40x56pd9c88tgbz2hdqr0000gn/T/tmpsht614mw


KeyboardInterrupt: 

# Run Model

Now you can run the model

In [None]:
from ultralytics import YOLO

# Load a model
path_to_model = "../runs/classify/train5/weights/best.pt"
model = YOLO(path_to_model)  # load a custom model

# Predict with the model
path_to_image1 = "data/nabis_test.jpg"
path_to_image2 = "data/forficula_test.jpeg"
results = model(path_to_image1, save_txt = True)  # predict on an image
print(results)




  return torch.load(file, map_location='cpu'), file  # load

image 1/1 /Users/deniz/Build/scl/Bplusplus/notebooks/data/nabis_test.jpg: 224x224 Forficula auricularia 0.65, Nabis rugosus 0.35, 8.9ms
Speed: 0.6ms preprocess, 8.9ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)
Results saved to [1m/Users/deniz/Build/scl/Bplusplus/runs/classify/predict8[0m
1 label saved to /Users/deniz/Build/scl/Bplusplus/runs/classify/predict8/labels


[ultralytics.engine.results.Results object with attributes:

boxes: None
keypoints: None
masks: None
names: {0: 'Forficula auricularia', 1: 'Nabis rugosus'}
orig_img: array([[[ 16, 105,  56],
        [ 15, 104,  55],
        [ 14, 103,  54],
        ...,
        [  2,  19,  10],
        [  3,  20,  11],
        [  3,  20,  11]],

       [[ 16, 105,  56],
        [ 15, 104,  55],
        [ 15, 104,  55],
        ...,
        [  2,  19,  10],
        [  2,  19,  10],
        [  2,  19,  10]],

       [[ 15, 104,  55],
        [ 15, 104,  55],
        [ 15, 104,  55],
        ...,
        [  1,  18,   9],
        [  1,  18,   9],
        [  1,  18,   9]],

       ...,

       [[  1,   9,   2],
        [  1,   9,   2],
        [  1,   9,   2],
        ...,
        [ 32, 102,  55],
        [ 34, 103,  53],
        [ 33, 102,  52]],

       [[  2,  10,   3],
        [  2,  10,   3],
        [  2,  10,   3],
        ...,
        [ 34, 102,  55],
        [ 35, 101,  52],
        [ 35, 101,  52

# More Advanced Uses

## Collect images from GBIF into folders

Download the images directly using the search parameters, and images will be grouped into folders according to the `group_by_key`

In [None]:
import os
from typing import Any

directory_path = os.path.join('dataset')
names = [
    "Nabis rugosus", 
    "Forficula auricularia",
    "Calosoma inquisitor",
    "Bombus veteranus",
    "Glyphotaelius pellucidus",
    "Notoxus monoceros",
    "Cacoxenus indagator",
    "Chorthippus mollis",
    "Trioza remota"
]

import bplusplus

search: dict[str, Any] = { 
    "scientificName":  names, 
    "country": ["US", "NL"]
}

bplusplus.collect_images(group_by_key=bplusplus.Group.scientificName, search_parameters=search, images_per_group=150, output_directory=directory_path)

Creating folders for images...
Beginning to collect images from GBIF...
Collecting images for Nabis rugosus...
Nabis rugosus : 280 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Nabis rugosus folder...
Collecting images for Forficula auricularia...
Forficula auricularia : 2980 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Forficula auricularia folder...
Collecting images for Calosoma inquisitor...
Calosoma inquisitor : 2629 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Calosoma inquisitor folder...
Collecting images for Bombus veteranus...
Bombus veteranus : 1075 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Bombus veteranus folder...
Collecting images for Glyphotaelius pellucidus...
Glyphotaelius pellucidus : 5696 parseable occurrences fetched, will sample for 150
Downloading 150 images into the Glyphotaelius pellucidus folder...
Collecting

## Train and Validate the Model

Use the extracted images to train and validate a pytorch model

In [None]:
from bplusplus.train_validate import train_validate
import os

dataset_directory = os.path.join('data', 'dataset')
names = ["Nabis rugosus", "Forficula auricularia"]
output_path = os.path.join('data', 'output')

train_validate(groups=names, dataset_path=dataset_directory, output_directory=output_path)

FileNotFoundError: [Errno 2] No such file or directory: 'data/dataset/Nabis rugosus'