<a id="configuration"></a>
<h2 style="padding: 12px 12px; background-color: #65fff1; font-family: Sans-Serif; color:black">
Object Detection on Waste
</h2>

In this notebook you will:
- Train
- Validate
- Test

a yolov8 custom model

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Source of dataset: https://www.kaggle.com/datasets/parohod/warp-waste-recycling-plant-dataset?select=.idea

#### Check acces to GPU

Let's make sure that we have access to GPU.

In [3]:
!nvidia-smi

'nvidia-smi' is not recognized as an internal or external command,
operable program or batch file.


In [None]:
!pip install ultralytics

In [None]:
import os
import yaml
import shutil
import random
import ultralytics
from tqdm.notebook import tqdm

Check environment

In [None]:
ultralytics.checks()

In [None]:
def log(msg):
    print(f'🔔---| {msg} ')

#### Installation

Import libraries for training and splitting data

Split data into train and validation set

In [None]:
#create file paths for the training and validation sets
train_path_img = "/content/drive/MyDrive/yolo_projects/waste_data/train/images"
train_path_label = "/content/drive/MyDrive/yolo_projects/waste_data/train/labels/"
val_path_img = "/content/drive/MyDrive/yolo_projects/waste_data/validation/images/"
val_path_label = "/content/drive/MyDrive/yolo_projects/waste_data/validation/labels/"
test_path_img = "/content/drive/MyDrive/yolo_projects/waste_data/test/images/"
test_path_label = "/content/drive/MyDrive/yolo_projects/waste_data/test/labels/"

def get_random(validation_path):
    #get a random image from the validation set
    random_indexes  = random.sample(range(0, len(os.listdir(validation_path))), k=10)
    # create a folder inside data/waste_data/ to store test images and labels
    os.makedirs("data/waste_data/test/images", exist_ok=True)
    os.makedirs("data/waste_data/test/labels", exist_ok=True)
    # move the random image and labels to the test folder
    for i in random_indexes:
        image = os.listdir(validation_path)[i]
        shutil.move(validation_path + image, test_path_img + image)
        label = image.replace(".jpg", ".txt")
        shutil.move(val_path_label + label, test_path_label + label)
        log(f"Moved {image} and {label} to test folder")


In [None]:
# count the number of files in the training and validation sets
train_files = os.listdir(train_path_img)
val_files = os.listdir(val_path_img)
log(f"Number of training images: {len(train_files)}")
log(f"Number of validation images: {len(val_files)}")
log(f"Number of test images: {len(os.listdir(test_path_img))}")


In [None]:
# Define a function for splitting training data to train and validation
def train_test_split(path,neg_path=None, split = 0.2):
    log("------ PROCESS STARTED -------")


    files = list(set([name[:-4] for name in os.listdir(path)])) ## removing duplicate names i.e. counting only number of images


    log(f"--- This folder has a total number of {len(files)} images---")
    random.seed(42)
    random.shuffle(files)

    # split the data
    test_size = int(len(files) * split)
    train_size = len(files) - test_size

    ## creating required directories for the images and their labels

    os.makedirs(train_path_img, exist_ok = True)
    os.makedirs(train_path_label, exist_ok = True)
    os.makedirs(val_path_img, exist_ok = True)
    os.makedirs(val_path_label, exist_ok = True)


    ### copying images and labels to train folder
    for filex in tqdm(files[:train_size]):
      if filex == 'classes':
          continue
      shutil.copy2(path + filex + '.jpg',f"{train_path_img}/" + filex + '.jpg' )
      shutil.copy2(path[0:43] + 'labels/'  + filex + '.txt', f"{train_path_label}/" + filex + '.txt')



    log(f"------ Training data created with 80% split {len(files[:train_size])} images -------")

    if neg_path:
        neg_images = list(set([name[:-4] for name in os.listdir(neg_path)])) ## removing duplicate names i.e. counting only number of images
        for filex in tqdm(neg_images):
            shutil.copy2(neg_path+filex+ ".jpg", f"{train_path_img}/" + filex + '.jpg')

        log(f"------ Total  {len(neg_images)} negative images added to the training data -------")

        log(f"------ TOTAL Training data created with {len(files[:train_size]) + len(neg_images)} images -------")



    ### copyt images and labels to validation folder
    for filex in tqdm(files[train_size:]):
      if filex == 'classes':
          continue
      # log("running")
      shutil.copy2(path + filex + '.jpg', f"{val_path_img}/" + filex + '.jpg' )
      shutil.copy2(path[0:43] + 'labels/' + filex + '.txt', f"{val_path_label}/" + filex + '.txt')

    log(f"------ Validation data created with a total of {len(files[train_size:])} images ----------")

    log("------ TASK COMPLETED -------")

# Create yaml file for training

In [None]:
classes = ['bottle-blue', 'bottle-green', 'bottle-dark', 'bottle-milk', 'bottle-transp', 'bottle-multicolor', 'bottle-yogurt', 'bottle-oil', 'cans', 'juice-cardboard', 'milk-cardboard', 'detergent-color', 'detergent-transparent', 'detergent-box', 'canister', 'bottle-blue-full', 'bottle-transp-full', 'bottle-dark-full', 'bottle-green-full', 'bottle-multicolor-full', 'bottle-milk-full', 'bottle-oil-full', 'detergent-white', 'bottle-blue5l', 'bottle-blue5l-full', 'glass-transp', 'glass-dark', 'glass-green']
for i in classes:
    print(f'{i}')

In [None]:
# infomation for yaml document
base_dir = os.getcwd()
log(base_dir)
train_data = os.path.join(base_dir, 'drive/MyDrive/yolo_projects/', 'waste_data/train/images/')
val_data = os.path.join(base_dir, 'drive/MyDrive/yolo_projects/', 'waste_data/validation/images/')
test_data = os.path.join(base_dir, 'drive/MyDrive/yolo_projects/', 'waste_data/test/images/')
data = {
    'train': train_data,
    'val': val_data,
    'test': test_data,
    'nc': 28, # number of classes
    'names': classes #class names
}

# create yaml file
with open("/content/drive/MyDrive/yolo_projects/dataset.yaml" , "w") as f:
   yaml.dump(data,f,default_flow_style = False)
log(f"dataset.yaml created!")

In [None]:
data_path = '/content/drive/MyDrive/yolo_projects/dataset.yaml'

Train model

In [None]:
import torch
# torch version
log(f"Pytorch version: {torch.__version__}")

# check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
log(f"Device: {device}")

In [None]:
os.environ['KMP_DUPLICATE_LIB_OK']='True' # for transformer models

In [None]:
model = ultralytics.YOLO('yolov8m.pt')

In [None]:
model.train(
    data=data_path,        # dataset.yaml file
    task='detect',              # 'detect', 'segment', 'class'
    imgsz=640,                  # image size
    epochs=50,                  # number of epochs
    # workers=0,                  # number of workers
    batch=16,                   # batch size
    patience=20,                # early stopping patience
    project='/content/drive/MyDrive/yolo_projects/waste_detection',             # save results to project/name
    mode='train',               # 'train', 'val', 'test'
    name='yolo',     # save results to project/name
    save=True,                  # save results to project/name/weights/last.pt
    device='0',                 # cuda device, i.e. 0 or 0,1,2,3 or cpu
)

In [None]:
# !yolo task=detect mode=train model=yolov8m.pt data=/content/drive/MyDrive/yolo_projects/dataset.yaml epochs=5 imgsz=640 batch=16 patience=20 project=/content/drive/MyDrive/yolo_projects/waste_detection name=yolo

# comment the code above and uncomment the code below to resume training if notebook is interupted
# Note: you will need to run all the previous lines of code
# !yolo task=detect mode=train resume data=/content/drive/MyDrive/Garbage_Detection/dataset.yaml model=/content/drive/MyDrive/Garbage_Detection/training_results/weights/last.pt

## Testing

Test using the 10 test images

In [None]:
# create a test