In [1]:
import os
from datetime import datetime
import fiftyone as fo
import fiftyone.utils.random as four
from ultralytics import YOLO

In [None]:

# Load the dataset from Hugging Face if it's your first time using it
# import fiftyone.utils.huggingface as fouh

# train_dataset = fouh.load_from_hub(
# "Voxel51/Coursera_lecture_dataset_train", 
# dataset_name="lecture_dataset_train", 
# persistent=True)


# test_dataset = fouh.load_from_hub(
# "Voxel51/Coursera_lecture_dataset_test", 
# dataset_name="lecture_dataset_test", 
# persistent=True)

In [None]:
#because I have the dataset saved locally, I will load it like so
# train_dataset = fo.load_dataset("lecture_dataset_train")
train_dataset = fo.load_dataset(name="lectrure-train-clone")

test_dataset = fo.load_dataset(name="lecture_dataset_test")

test_dataset = test_dataset.clone(name="lecture-test-clone")

We'll first train a model. 

The code here isn't the star of the show, but I'll briefly describe what we're doing. After this lesson, we'll simply import this code from some helper file. This code defines a pipeline for training and evaluating a YOLO object detection model using the FiftyOne library. The main steps are:

1. Export dataset to YOLO format
2. Train YOLO model on the formatted dataset
3. Run inference on evaluation set
4. Evaluate model performance

1. `export_to_yolo_format()`: Converts a FiftyOne dataset to YOLO format, handling multiple data splits if specified.

2. `train_model()`: Splits the dataset, exports it to YOLO format, trains a YOLO model with the given configuration, and returns the best model.

3. `run_inference_on_eval_set()`: Applies the trained model to an evaluation dataset and saves the predictions.

4. `eval_model()`: Evaluates the model's performance on a dataset by computing detection metrics, including mean average precision (mAP).

5. `run()`: Orchestrates the entire process by training the model, running inference on the test set, and evaluating the results.

The `run()` function ties everything together, taking a training dataset, test dataset, and training configuration as inputs. It trains the model, runs inference on the test set, and returns the evaluation results.

In [None]:
def export_to_yolo_format(
    samples,
    classes,
    label_field="ground_truth",
    export_dir="./yolo_formatted",
    splits=["train", "val"]
):
    """
    Export samples to YOLO format, optionally handling multiple data splits.

    Args:
        samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export.
        export_dir (str): The directory where the exported data will be saved.
        classes (list): A list of class names for the YOLO format.
        label_field (str, optional): The field in the samples that contains the labels.
            Defaults to "ground_truth".
        splits (str, list, optional): The split(s) to export. Can be a single split name (str) 
            or a list of split names. If None, all samples are exported as "val" split. 
            Defaults to None.

    Returns:
        None

    """
    if splits is None:
        splits = ["val"]
    elif isinstance(splits, str):
        splits = [splits]

    for split in splits:
        split_view = samples if split == "val" and splits == ["val"] else samples.match_tags(split)
        
        split_view.export(
            export_dir=export_dir,
            dataset_type=fo.types.YOLOv5Dataset,
            label_field=label_field,
            classes=classes,
            split=split
        )

You can learn more about converting dataset formats [here](https://docs.voxel51.com/recipes/convert_datasets.html).

### Train model


You can learn more about the hypeparameters for the Ultralytics model [here](https://docs.ultralytics.com/modes/train/#train-settings).

In [None]:
def train_model(training_dataset, training_config):
    """
    Train the YOLO model on the given dataset using the provided configuration.
    """
    four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']})

    export_to_yolo_format(
        samples=training_dataset,
        classes=training_dataset.default_classes,
    )

    model = YOLO("yolov8m.pt")

    results = model.train(
        data="./yolo_formatted/dataset.yaml",
        **training_config['train_params']
    )
    
    best_model_path = str(results.save_dir / "weights/best.pt")
    best_model = YOLO(best_model_path)

    return best_model

### Here are some recommendations for training YOLOv8m on images with small detections, similar looking objects, possibly mixed up labels, and a large number of detections per image:

1. Image size: Use a larger input image size to help with small object detection. Consider using `imgsz=1280` or even `1536` if your GPU memory allows.

2. Mosaic and scale augmentations: Enable strong mosaic and scale augmentations to help with small object detection and similar looking objects.

   ```python
   model.train(data=dataset.yaml, imgsz=1280, epochs=30, batch=16, 
               mosaic=1.0, scale=0.9)
   ```

3. Anchor optimization: YOLOv8 is anchor-free, but you can still optimize detection parameters:

   ```python
   model.train(data=dataset.yaml, imgsz=1280, epochs=30, batch=16,
               overlap_mask=True, mask_ratio=4)
   ```

4. Learning rate: Use a lower initial learning rate and cosine learning rate scheduler:

   ```python
   model.train(data=dataset.yaml, imgsz=1280, epochs=30, batch=16,
               lr0=0.001, lrf=0.01)
   ```

5. Regularization: To help with possibly mixed up labels, use label smoothing and increased weight decay:

   ```python
   model.train(data=dataset.yaml, imgsz=1280, epochs=30, batch=16,
               label_smoothing=0.1, weight_decay=0.0005)
   ```

6. Data augmentation: Use strong augmentations to help with similar looking objects:

   ```python
   model.train(data=dataset.yaml, imgsz=1280, epochs=30, batch=16,
               degrees=45, translate=0.2, scale=0.9, shear=10, 
               perspective=0.001, flipud=0.5, fliplr=0.5)
   ```

7. Focal loss: Consider using focal loss to help with class imbalance due to many detections per image:

   ```python
   model.train(data=dataset.yaml, imgsz=1280, epochs=30, batch=16,
               fl_gamma=1.5)
   ```

8. Mixed precision training: Enable AMP for faster training:

   ```python
   model.train(data=dataset.yaml, imgsz=1280, epochs=30, batch=16, amp=True)
   ```

9. Patience and epochs: Train for a longer time with patience for early stopping:

   ```python
   model.train(data=dataset.yaml, imgsz=1280, epochs=30, batch=16,
               patience=50)
   ```

10. Multi-scale training: Enable multi-scale training to help with varying object sizes:

    ```python
    model.train(data=dataset.yaml, imgsz=1280, epochs=30, batch=16,
                multi_scale=True)
    ```

### I'm just going to combine these settings into a single training config, and I'll use the same settings throughout the course.

In [None]:
training_config = {
    # Dataset split
    "train_split": 0.9,
    "val_split": 0.1,

    # Training parameters
    "train_params": {
        # "epochs": 30,
        "epochs": 1,
        "batch": 16,
        "imgsz": 1280,
        "lr0": 0.001,
        "lrf": 0.01,
        "momentum": 0.937,
        "weight_decay": 0.0005,
        "warmup_epochs": 3.0,
        "warmup_momentum": 0.8,
        "warmup_bias_lr": 0.1,
        "box": 7.5,
        "cls": 0.5,
        "dfl": 1.5,
        "fl_gamma": 1.5,
        "label_smoothing": 0.1,
        "nbs": 64,
        "hsv_h": 0.015,
        "hsv_s": 0.7,
        "hsv_v": 0.4,
        "degrees": 45,
        "translate": 0.2,
        "scale": 0.9,
        "shear": 10,
        "perspective": 0.001,
        "flipud": 0.5,
        "fliplr": 0.5,
        "mosaic": 1.0,
        "mixup": 0.1,
        "copy_paste": 0.1,
        "amp": True,
        "multi_scale": True,
        "overlap_mask": True,
        "mask_ratio": 4,
        "patience": 50
    }
}

Visit [the docs](https://docs.voxel51.com/api/fiftyone.core.dataset.html#fiftyone.core.dataset.Dataset.apply_model) for more detail on the `apply_model` function.

In [None]:
def run_inference_on_eval_set(eval_dataset, best_model):
    """
    Run inference on the evaluation set using the best trained model.

    Args:
        eval_dataset (fiftyone.core.dataset.Dataset): The evaluation dataset.
        best_model (YOLO): The best trained YOLO model.

    Returns:
        The dataset eval_dataset with predictions
    """
    eval_dataset.apply_model(best_model, label_field="predictions")
    eval_dataset.save()
    return eval_dataset

You can read more about the `evaluate_detections` method [in the docs](https://docs.voxel51.com/api/fiftyone.core.dataset.html#fiftyone.core.dataset.Dataset.evaluate_detections), and check out [this tutorial](https://docs.voxel51.com/tutorials/evaluate_detections.html) for a different perspective on evaluations.

In [None]:
def eval_model(dataset_to_evaluate):
    """
    Evaluate the model on the evaluation dataset.

    Args:
        dataset_to_evaluate (fiftyone.core.dataset.Dataset): The evaluation dataset.

    Returns:
        the mean average precision (mAP) of the model on the evaluation dataset.
    """
    current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")

    detection_results = dataset_to_evaluate.evaluate_detections(
        gt_field="ground_truth",  
        pred_field="predictions",
        eval_key=f"evalrun_{current_datetime}",
        compute_mAP=True,
        )

    return detection_results

In [3]:
def run(train_dataset, test_dataset, training_config):
    """
    Main function to run the entire training and evaluation process.

    Returns:
        None
    """

    best_trained_model = train_model(training_dataset=train_dataset, training_config=training_config)
    
    model_predictions = run_inference_on_eval_set(eval_dataset=test_dataset, best_model=best_trained_model)
    
    model_results = eval_model(dataset_to_evaluate=model_predictions)

    return model_results