# create_new_dataset.ipynb

This notebook implements my workflow for fine tuning a YOLOv8 object detection model which
detects coconut rhinoceros beetle damage in coconut palms.

# Installation

Clone the repo
```
git clone https://github.com/aubreymoore/CRB-Damage-Dataset-Improvement
```

Move to the new folder
```
cd CRB-Damage-Dataset-Improvement
```

Create a virtual environment
```
python3 -m venv .venv
```

Activate the new virtual environment
```
source venv/bin/activate
```

Install required python modules
```
pip install -r code/requirements.txt
```

Create a .gitignore file and add .venv to the list of files and folders to be ignored.
Adding a virtual environmant to a repository is bad practice.
```
echo ".venv" >> .gitignore
```

# References

https://pybit.es/articles/a-better-place-to-put-your-python-virtual-environments/

[Image Deduplication](https://github.com/voxel51/fiftyone-examples/blob/master/examples/image_deduplication.ipynb)

[CVAT <> FiftyOne: Data-Centric Machine Learning with Two Open Source Tools](https://www.cvat.ai/post/data-centric)

[FiftyOne - Ultralytics Integration](https://docs.voxel51.com/integrations/ultralytics.html)

[Finding Detection Mistakes with FiftyOne](https://docs.voxel51.com/tutorials/detection_mistakes.html)

[Fine-tune YOLOv8 models for custom use cases with the help of FiftyOne](https://docs.voxel51.com/tutorials/yolov8.html)

[FiftyOne Brain](https://docs.voxel51.com/brain.html)

[Tracking Datasets in FiftyOne](https://voxel51.com/blog/tracking-datasets-in-fiftyone/)

In [1]:
import os
import shutil
import glob
import fiftyone as fo
import fiftyone.brain as fob
import fiftyone.zoo as foz
from fiftyone import ViewField as F
import logging
import sys
from icecream import ic
from datetime import datetime
import numpy as np
from numpy.linalg import norm
from ultralytics import YOLO
import glob
import ipywidgets as widgets
from time import sleep

In [2]:
def create_timestamp_for_filename():
    """ 
    Returns current time formated for use in creating a new file path.
    Formatted as '%Y%m%d_%H%M'; Example: '20241129_1648'
    """
    return datetime.strftime(datetime.now(), '%Y%m%d_%H%M')

# create_timestamp_for_filename()

In [3]:
def add_timestamp_field():
    dataset.add_sample_field("timestamp", fo.DateTimeField)

    for sample in dataset:
        timestamp_str = os.path.basename(sample.filepath)[4:-4]
        dt = datetime.strptime(timestamp_str, '%Y%m%d_%H%M%S')
        # ic(timestamp_str, dt)
        sample['timestamp'] = dt
        sample.save()
    
    # Create view  
    view = dataset.sort_by(F'timestamp')
    dataset.save_view('sorted_by_timestamp', view, overwrite=True)

In [4]:
def update_requirements_file():
    os.system('pip list --format=freeze > requirements.txt')

# update_requirements_file()

In [5]:
def create_new_dataset(NONSTANDARD_DATASET_PATH, YOLO_DATASET_PATH):
    """ 
    """
    os.mkdir(YOLO_DATASET_PATH)
    os.mkdir(f'{YOLO_DATASET_PATH}/images')
    os.mkdir(f'{YOLO_DATASET_PATH}/images/train')
    os.mkdir(f'{YOLO_DATASET_PATH}/images/val')
    os.mkdir(f'{YOLO_DATASET_PATH}/labels')
    os.mkdir(f'{YOLO_DATASET_PATH}/labels/train')
    os.mkdir(f'{YOLO_DATASET_PATH}/labels/val')
    
    for filepath in glob.glob(f'{NONSTANDARD_DATASET_PATH}/train/*.jpg'):
        shutil.copy2(filepath, f'{YOLO_DATASET_PATH}/images/train')
    for filepath in glob.glob(f'{NONSTANDARD_DATASET_PATH}/train/*.txt'):
        shutil.copy2(filepath, f'{YOLO_DATASET_PATH}/labels/train')
    for filepath in glob.glob(f'{NONSTANDARD_DATASET_PATH}/val/*.jpg'):
        shutil.copy2(filepath, f'{YOLO_DATASET_PATH}/images/val')
    for filepath in glob.glob(f'{NONSTANDARD_DATASET_PATH}/val/*.txt'):
        shutil.copy2(filepath, f'{YOLO_DATASET_PATH}/labels/val')
        
    s = f'path: {YOLO_DATASET_PATH} \n'
    s += 'train: ./images/train/ \n'
    s += 'val: ./images/val/ \n'
    s += 'names: \n'
    s += '  0: live \n'
    s += '  1: dead \n'
    s += '  2: vcut \n'
    with open(f'{YOLO_DATASET_PATH}/dataset.yaml', 'w') as f:
        f.write(s)

In [6]:
def yolo2fiftyone(name, dataset_dir, splits=["train", "val"]):
    """ 
    Imports a dataset in YOLO5 format into FiftyOne, using tags to mark the samples in each split
    
    Arguments:
        name - name of FiftyOne dataset to be created 
        dataset_dir - path to dataset in YOLOv5 format
        splits - list of splits to be included   
    """ 
    dataset = fo.Dataset(name, persistent=True)
    for split in splits:
        dataset.add_dir(
            dataset_dir=dataset_dir,
            dataset_type=fo.types.YOLOv5Dataset,
            split=split,
            tags=split,
    )
    return dataset

# yolo2fiftyone(FO_DATASET_NAME, YOLO_DATASET_PATH)

In [7]:
def add_embeddings_field():
    """ 
    """ 
    model = foz.load_zoo_model("mobilenet-v2-imagenet-torch")
    dataset.compute_embeddings(model=model, embeddings_field='embeddings')
    
# add_embeddings_field()

In [8]:
def cosine_similarity(a, b):
    return np.dot(a,b)/(norm(a)*norm(b))
 
# a = np.array([2,1,2,3,2,9])
# b = np.array([3,4,2,4,5,5])
# cosine_similarity(a, b)

In [9]:
def add_similarity_with_prev_img_field():
    """ 
    """
    view = dataset.load_saved_view('sorted_by_timestamp')
    # thresh = 0.92
    first_sample = True
    for sample in view:
        if first_sample:
            current_embeddings = sample.embeddings
            similarity = 0.0
            first_sample = False
        else:
            previous_embeddings = current_embeddings
            current_embeddings = sample.embeddings
            similarity = cosine_similarity(previous_embeddings, current_embeddings)
        sample['similarity_with_prev_img'] = similarity
        # if similarity > thresh:
        #     sample.tags.append(f'similarity>{thresh}')
        # else:
        #     sample.tags.append('similarity OK') 
        sample.save()

In [10]:
def add_predictions_field():
    """ 
    """
    # Load YOLOv8 model
    # from ultralytics import YOLO
    model = YOLO(WEIGHTS)
    dataset.apply_model(model, label_field="yolov8")
    
# add_predictions_field()

In [11]:
def add_mistakenness_field():
    """ 
    Adds mistakenness, possible_missing and possible_spurious fields.
    See docs at https://docs.voxel51.com/brain.html#label-mistakes for details.
    """
    fob.compute_mistakenness(dataset, "yolov8", label_field="ground_truth")  
    
# add_mistakenness_field() 

In [12]:
def add_field(fieldname, func):
    """ 
    This utility function checks for existence of a field in a dataset.
    If the field does not exist it is added by running func.
    """
    if dataset.get_field(fieldname):
        logger.info(f'"{fieldname}" field already exists')
    else:
        logger.info(f'Adding "{fieldname}" field')
        func()

# def add_new_field():
#     """ 
#     Code for adding a field named 'new' should be inserted in this function.
#     """
#     pass
    
# add_field('new', add_new_field)

In [13]:
def create_autocorrelated_images_view(threshold, delete=False):
    """ 
    """
    dataset = fo.load_dataset(FO_DATASET_NAME)
    sorted_by_timestamp_view = dataset.load_saved_view('sorted_by_timestamp')
    view = sorted_by_timestamp_view.match(
        F('similarity_with_prev_img') > threshold)
    dataset.save_view("autocorrelated_images_view", view, overwrite=True)
    count = view.count()
    
    if delete:
        dataset.delete_samples(view) 
        dataset.save()
     
    return count
  
# create_autocorrelated_images_view(0.98, True)

In [14]:
def count_ground_truth_bbs(dataset):
    total_detections = 0
    for sample in dataset:
        total_detections += len(sample.ground_truth.detections)
    return total_detections

# count_ground_truth_bbs()

In [15]:
def create_bb_touching_edge_view(delete=False):
    """ 
    Find, tag and optionally delete ground truth bounding boxes which are touching
    or almost touching the left, top or right edge of the image.    
    
    Reference:
        https://docs.voxel51.com/recipes/remove_duplicate_annos.html
    """
    dataset = fo.load_dataset(FO_DATASET_NAME)
    view = dataset.filter_labels('ground_truth', 
        (F('bounding_box')[0] <= 0.001) | # left
        (F('bounding_box')[1] <= 0.001) | # top
        ((F('bounding_box')[0] + F('bounding_box')[3]) >= 0.999) # right
    )
    count = view.count()
    view.tag_labels('bb touching edge')   
    dataset.save_view('bb_touching_edge', view, overwrite=True) 
            
    if delete:
        dataset.delete_labels(tags="bb touching edge")
         
    dataset.save()
            
    return  count

# create_bb_touching_edge_view(delete=True)

In [16]:
def remove_unannotated_images(yolo5_dataset_path: str) -> int:
    """ 
    Removes unannoted images from a YOLO5 data set
    Arguments:
        yolo5_dataset_path -- absolute path for the YOLO5 dataset
    Returns:
        count -- number of image (*.jpg) and annotation file pairs removed
    """ 
    search_str = f'{yolo5_dataset_path}/**/*.txt'
    txt_paths = glob.glob(search_str, recursive=True)
    count = 0
    for txt_path in txt_paths:
        if os.path.getsize(txt_path) == 0:
            img_path = txt_path.replace('labels', 'images').replace('.txt', '.jpg')
            os.remove(txt_path)
            os.remove(img_path)
            count += 1
    return count

# remove_unannotated_images(
#     yolo5_dataset_path='/home/aubrey/myexport')

In [17]:
def export_51_to_YOLO(dataset_name: str, 
                      export_dir: str, 
                      remove_unannotated: bool) -> int:
    """
    Export a dataset from 51 format to YOLO5 format.
    Optionally, unannotated images will be removed from the export_dir.
    
    Arguments:
    dataset_name -- a saved (persistent) 51 dataset (the dataset to be exported)
    export_dir -- absolute destination path for the YOLO5 dataset. Should be given a new dataset name.
    remove_unannoted -- if True, unannotated images (images containing no detected objects) are removed from the new YOLO5 dataset

    Reference https://docs.voxel51.com/user_guide/export_datasets.html#yolov5dataset
    """
    label_field = "ground_truth"

    # The splits to export
    splits = ["train", "val"]

    # All splits must use the same classes list
    classes = ["live", "dead", "vcut"]

    # The dataset or view to export
    # We assume the dataset uses sample tags to encode the splits to export
    dataset_or_view = fo.load_dataset(dataset_name)

    # Export the splits
    for split in splits:
        split_view = dataset_or_view.match_tags(split)
        split_view.export(
            export_dir=export_dir,
            dataset_type=fo.types.YOLOv5Dataset,
            label_field=label_field,
            split=split,
            classes=classes,
        )
        
    # Remove unannotated images (optional)
    images_removed = 0
    if remove_unannotated:
        images_removed = remove_unannotated_images(
            yolo5_dataset_path=export_dir)
    return images_removed     

# export_51_to_YOLO(
#     dataset_name='Guam07v4', 
#     export_dir='/home/aubrey/crb_damage_detector_data/datasets/Guam07v5', 
#     remove_unannotated=True)

In [18]:
def train_model(yaml_path):
    model = YOLO()
    results = model.train(
        data = yaml_path,
        imgsz=1920,
        rect=True,
        epochs=1000,
        batch=-1,
        patience=50,
        name='yolo11n.pt'
    )

# train_model()

In [19]:
def launch_cvat(anno_key_suffix: str, view) -> str:
    """ 
    Saves a FiftyOne view in CVAT and launches the CVAT annotator
    
    Arguments:
    anno_key_suffix - string     
    view - the view to be imported into CVAT
    
    Result:
    
    anno_key - a unique string in the form of myview-2024-11-27-16:57
    """
    timestamp = datetime.strftime(datetime.now(), '%Y%m%d%H%M')
    anno_key = f'{anno_key_suffix}_{timestamp}'
    view.annotate(
        anno_key= anno_key,
        label_field="ground_truth", 
        launch_editor=True
    )
    return anno_key
    
# random_dozen_view = dataset.take(12)
# launch_cvat('random_dozen', random_dozen_view)


In [20]:
def configure_logger(LOGFILE):
    """
    Configure logger to send messages to notebook and LOGFILE
    """
    logging.root.handlers = []
    logging.basicConfig(
        level=logging.INFO, 
        format='%(asctime)s %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S',
        handlers=[
            logging.FileHandler(filename=LOGFILE),
            logging.StreamHandler(sys.stdout)
        ]
    )
    logger = logging.getLogger()
    return logger

In [21]:
def get_latest_weights_path():
    """ 
    """
    search_str = f'{DATAPATH}/runs/[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_[0-9][0-9][0-9][0-9]'
    runs_list = glob.glob(search_str)
    latest_run = sorted(runs_list, reverse=True)[0]
    latest_weights_path = f'{latest_run}/weights/best.pt'
    return latest_weights_path

In [22]:
def get_latest_yaml_path():
    """ 
    """
    search_str = f'{DATAPATH}/datasets/[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]_[0-9][0-9][0-9][0-9]'
    datasets_list = glob.glob(search_str)
    latest_dataset = sorted(datasets_list, reverse=True)[0]
    latest_yaml_path = f'{latest_dataset}/dataset.yaml'
    return latest_yaml_path

In [23]:
def get_latest_51_dataset():
    """ 
    Returns latest FiftyOne dataset which is named with a time stamp like '20241203_1148'
    """
    datasets = []
    for ds in fo.list_datasets():
        if ds[0:7].isnumeric() and ds[8]=='_' and ds[9:12].isnumeric: 
            datasets.append(ds)
    latest_51_dataset = sorted(datasets, reverse=True)[0]
    return latest_51_dataset

# get_latest_51_dataset()

In [24]:
def retrain_model():
    """ 
    """
    model = YOLO(latest_weights_path)
    results = model.train(
            data=latest_yaml_path,
            imgsz=1920,
            rect=True,
            epochs=1000,
            batch=-1,
            patience=200,
            save_dir=f'{DATAPATH}/runs',
            name=create_timestamp_for_filename()
        )

In [25]:
def delete_tagged_bounding_boxes(dataset_name):
    """ 
    Deletes labels (bounding boxes) that are tagged with 'delete'
    """
    dataset = fo.load_dataset(dataset_name)
    count_before = dataset.count_label_tags()
    dataset.delete_labels(tags='delete')
    count_after = dataset.count_label_tags()
    dataset.save()
    return count_before, count_after

dataset_name = get_latest_51_dataset()    
delete_tagged_bounding_boxes(dataset_name)

({}, {})

```
if YOLO_DATASET_PATH exists:
    continue
else:
    create_new_dataset(NONSTANDARD_DATASET_PATH, YOLO_DATASET_PATH)

if FO_DATASET_NAME exists:
    continue
else:
    yolo2fiftyone(name=FO_DATASET_NAME, dataset_dir=YOLO_DATASET_PATH)
    
dataset = fo.load_dataset(FO_DATASET_NAME)

# Add sample fields if they don't already exist
add_field('timestamp', add_timestamp_field)
add_field('embeddings', add_embeddings_field)
add_field('similarity_with_prev_img', add_similarity_with_prev_img_field)
add_field('yolov8', add_predictions_field)
add_field('mistakenness', add_mistakenness_field)

if 'bb_touching_edge' in dataset.list_saved_views():
    continue
else:
    create_bb_touching_edge_view(DELETE_BBS_TOUCHING_EDGES)

if 'autocorrelated_images_view' in dataset.list_saved_views():
    continue
else:
    create_autocorrelated_images_view(AUTOCORRELATED_IMAGES_THRESHOLD, DELETE_AUTOCORRELATED_IMAGES)

if RETRAIN_MODEL:
    export_51_to_YOLO()
    train_model()
else:
    continue

if LAUNCH_51:
    dataset = fo.load_dataset(FO_DATASET_NAME)
    session = fo.launch_app(dataset, auto=False)
```

In [26]:
# # MAIN

# # Start of constants #############################################################################

# DATAPATH = '/home/aubrey/crb_damage_detector_data'

# # path to dataset nonstandard format (images and labels in same folders). Not normally used.
# NONSTANDARD_DATASET_PATH = None

# # path to latest dataset in YOLOv5 format
# YOLO_DATASET_PATH = '/home/aubrey/crb_damage_detector_data/datasets/20241128_1648'

# # path to latest weights file
# WEIGHTS = '/home/aubrey/crb_damage_detector_data/runs/20241128_1657/weights/best.pt'

# # name of FiftyOne dataset
# FO_DATASET_NAME = '20241128_1648'

# # file name for log file saved in the same folder as this notebook
# LOGFILE = f'{FO_DATASET_NAME}.log'

# # Arguments for create_autocorrelated_images_view function.
# AUTOCORRELATED_IMAGES_THRESHOLD = 0.98
# DELETE_AUTOCORRELATED_IMAGES = True

# # Argument for create_autocorrelated_images_view function
# DELETE_BBS_TOUCHING_EDGES = True

# # Option to retrain model. Usually FALSE.
# RETRAIN_MODEL = False

# # Option to launch the FiftyOne app in a browser at end of workflow. Usually True.
# LAUNCH_51 = True

# # End of constants ########################################################################

# #configure logger
# logger = configure_logger(LOGFILE)

# # update requirements.txt
# logger.info('Updating "requirements.txt"')
# update_requirements_file()

# # # wrangle dataset into YOLOv5 format
# # if os.path.exists(YOLO_DATASET_PATH):
# #     logger.info(f'"{YOLO_DATASET_PATH}" already exists in YOLOv5 format')
# # else:
# #     logger.info(f'creating dataset "{YOLO_DATASET_PATH}" in YOLOv5 format')
# #     create_new_dataset(NONSTANDARD_DATASET_PATH, YOLO_DATASET_PATH)

# # Create new FiftyOne dataset
# if FO_DATASET_NAME in fo.list_datasets():
#     logger.info(f'FiftyOne dataset "{FO_DATASET_NAME}" already exists') 
# else:
#     logger.info(f'Creating FiftyOne dataset "{FO_DATASET_NAME}"')
#     dataset = yolo2fiftyone(name=FO_DATASET_NAME, dataset_dir=YOLO_DATASET_PATH)
    
# # Load dataset
# logger.info(f'Loading FiftyOne dataset "{FO_DATASET_NAME}"')
# dataset = fo.load_dataset(FO_DATASET_NAME)
# logger.info(f'    Ground truth bounding boxes: {count_ground_truth_bbs(dataset)}')

# # Add fields if they don't already exist
# add_field('timestamp', add_timestamp_field)
# add_field('embeddings', add_embeddings_field)
# add_field('similarity_with_prev_img', add_similarity_with_prev_img_field)
# add_field('yolov8', add_predictions_field)
# add_field('mistakenness', add_mistakenness_field)

# # Find bounding boxes touching left, top or right edges of images
# if 'bb_touching_edge' in dataset.list_saved_views():
#     logger.info('"bb_touching_edge_view" already exists')
# else:
#     logger.info('Creating "bb_touching_edge_view"')
#     if DELETE_BBS_TOUCHING_EDGES:
#         logger.info('    "DELETE_BBS_TOUCHING_EDGES" is True; bbs will be deleted')
#     else:
#         logger.info('    "DELETE_BBS_TOUCHING_EDGES" is False; bbs will not be deleted')
#     bb_touching_edge_count = create_bb_touching_edge_view(DELETE_BBS_TOUCHING_EDGES)
#     logger.info(f'    {bb_touching_edge_count} ground truth bounding boxes touching image edges were found')

# # Find autocorrelated images
# if 'autocorrelated_images_view' in dataset.list_saved_views():
#     logger.info('"autocorrelated_images_view" already exists')
# else:
#     logger.info('Creating "autocorrelated_images_view"')
#     if DELETE_BBS_TOUCHING_EDGES:
#         logger.info('    "DELETE_AUTOCORRELATED_IMAGES" is True; samples will be deleted')
#     else:
#         logger.info('    "DELETE_AUTOCORRELATED_IMAGES" is False; bbs will not be deleted')
#     autocorrelated_image_count = create_autocorrelated_images_view(
#         threshold=AUTOCORRELATED_IMAGES_THRESHOLD, delete=DELETE_AUTOCORRELATED_IMAGES)
#     logger.info(f'    With a threshold of {AUTOCORRELATED_IMAGES_THRESHOLD}, {autocorrelated_image_count} autocorrelated images were found')

# if RETRAIN_MODEL:
#     export_51_to_YOLO(
#         dataset_name='Guam07v3', 
#         export_dir='/home/aubrey/myexport', 
#         remove_unannotated=True)
#     train_model()

# if LAUNCH_51:
    
#     # Reload dataset
#     logger.info(f'Loading FiftyOne dataset "{FO_DATASET_NAME}"')
#     dataset = fo.load_dataset(FO_DATASET_NAME)
#     logger.info(f'    Ground truth bounding boxes: {count_ground_truth_bbs(dataset)}')

#     # Launch FiftyOne app in browser
#     logger.info(f'Launching FifyOne app in browser')
#     session = fo.launch_app(dataset, auto=False)
#     logger.info(session)

# logger.info('FINISHED')

In [27]:
def annotate_with_cvat(saved_view: str):
    """ 
    Create annotation tasks for images in view
    Reference: https://docs.voxel51.com/user_guide/annotation.html
    The CVAT app should appear, loaded with images to be annotated.
    When finished with annotations, merge back into the 51 dataset using 
    merge_cvat_annotations().
    """
    anno_key = "x"
    view = dataset.load_saved_view(saved_view)
    view = dataset.filter_labels("ground_truth", F("mistakenness") > 0.338)
    view.annotate(anno_key, label_field="ground_truth", launch_editor=True)
    # print(dataset.get_annotation_info(anno_key))
  
# annotate_with_cvat('mistakenness > 0.338')

In [28]:
def merge_cvat_annotations():
    """
    Merge the annotations back into FiftyOne
    Reference: https://docs.voxel51.com/user_guide/annotation.html
    """
    anno_key = "x"
    dataset.load_annotations(anno_key)

    # Load the view that was annotated in the App
    view = dataset.load_annotation_view(anno_key)
    session = fo.launch_app(view=view, auto=False)

    # Cleanup

    # Delete tasks from CVAT
    results = dataset.load_annotation_results(anno_key)
    results.cleanup()

    # Delete run record (not the labels) from FiftyOne
    dataset.delete_annotation_run(anno_key)
    
# merge_cvat_annotations()

In [29]:
def create_mistakenness_338_view():
    view = dataset.match(F("mistakenness") > 0.338)
    dataset.save_view('mistakenness > 0.338', view, overwrite=True) 

# create_mistakenness_338_view()

In [30]:
# MAIN

## CONSTANTS

DATAPATH = '/home/aubrey/crb_damage_detector_data'

# path to dataset nonstandard format (images and labels in same folders). Not normally used.
NONSTANDARD_DATASET_PATH = None

# path to latest dataset in YOLOv5 format
YOLO_DATASET_PATH = '/home/aubrey/crb_damage_detector_data/datasets/20241128_1648'

# path to latest weights file
WEIGHTS = '/home/aubrey/crb_damage_detector_data/runs/20241128_1657/weights/best.pt'

# name of FiftyOne dataset
FO_DATASET_NAME = '20241128_1648'

# file name for log file saved in the same folder as this notebook
LOGFILE = f'{FO_DATASET_NAME}.log'

# Arguments for create_autocorrelated_images_view function.
AUTOCORRELATED_IMAGES_THRESHOLD = 0.98
DELETE_AUTOCORRELATED_IMAGES = True

# Argument for create_autocorrelated_images_view function
DELETE_BBS_TOUCHING_EDGES = True

# Option to retrain model. Usually FALSE.
RETRAIN_MODEL = False

# Option to launch the FiftyOne app in a browser at end of workflow. Usually True.
LAUNCH_51 = True

## CONFIGURE LOGGER
logger = configure_logger(LOGFILE)

## update requirements.txt
logger.info('Updating "requirements.txt"')
update_requirements_file()

# CONTROL PANEL

## Buttons
btn_descriptions = [
    'import latest yolo dataset into 51', 
    'Launch 51 in browser', 
    'add embeddings',
    'export_51_to_YOLO',
    'train_model'
]

## Callback function
def on_button_clicked(b):
    """ 
    To provide a visual indication of status, button sttyle is changed to 'primary' when clicked and 'success' when execution ends.
    After a button is clicked, all buttons are disabled. All buttons are enabled when execution ends.
    """
    for button in buttons:
        button.disabled = True
    b.button_style = 'primary' 
    match b.description:
        case 'import latest yolo dataset into 51':
            latest_yaml_path = get_latest_yaml_path()
            name = create_timestamp_for_filename()
            basename = os.path.basename(latest_yaml_path)
            dataset_dir = latest_yaml_path.replace(f'/{basename}', '')
            logger.info(f'Executing yolo2fiftyone() with the following parameters:')
            logger.info(f'   name={name}')
            logger.info(f'   dataset_dir={dataset_dir}')
            logger.info(f'   splits=["train", "val"])')
            yolo2fiftyone(name=name, dataset_dir=dataset_dir, splits=["train", "val"])
        case 'Launch 51 in browser':     
            logger.info(f'Launching FifyOne app in browser')
            dataset = fo.load_dataset(name = get_latest_51_dataset())
            session = fo.launch_app(dataset=dataset, auto=False)
            logger.info(session)
        case 'add embeddings': 
            logger.info('add_embeddings_field()')
            add_embeddings_field()
        case 'export_51_to_YOLO':
            dataset_name = get_latest_51_dataset()
            export_dir = f'{DATAPATH}/dataset/{create_timestamp_for_filename()}'
            remove_unannotated = True
            logger.info(f'Executing export_51_to_yolo() with the following parameters:')
            logger.info(f'   dataset_name={dataset_name}')
            logger.info(f'   export_dir={export_dir}')
            logger.info(f'   remove_unannotated={remove_unannotated}')
            export_51_to_YOLO(
                dataset_name=dataset_name, 
                export_dir=export_dir, 
                remove_unannotated=remove_unannotated) 
        case 'train_model':
            yaml_path = latest_yaml_path()
            train_model(yaml_path=yaml_path) 
            logger.info(f'Executing train_model() with the following parameter:')
            logger.info(f'   yaml_path={yaml_path}')         
        case _: 
            print('--- not a valid b.description')           
    b.button_style = 'success'
    for button in buttons:
        button.disabled = False

# Widget layout
buttons = []
for i, btn_description in enumerate(btn_descriptions):
    buttons.append(widgets.Button(description=f'{btn_descriptions[i]}'))   
for button in buttons:
    button.on_click(on_button_clicked)
widgets.HBox(buttons)


2024-12-03 17:34:47 Updating "requirements.txt"


HBox(children=(Button(description='import latest yolo dataset into 51', style=ButtonStyle()), Button(descripti…