## YOLOv8

In [1]:
# Mount Google Drive

from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# Checking how many images are in a folder
import os
total_images = "/content/gdrive/MyDrive/3dPrinterFault/dataset-before-split/images"
print(len(os.listdir(total_images)))
num_images = "/content/gdrive/MyDrive/3dPrinterFault/dataset-before-split/labels"
print(len(os.listdir(num_images)))

In [None]:
# !pip install ultralytics
!git clone https://github.com/ultralytics/ultralytics
# Navigate to the cloned directory
%cd ultralytics
# Install the package in editable mode for development
%pip install -e .

In [None]:
### 4. Train model ###

import os

from ultralytics import YOLO

# Load a model
model = YOLO("yolov8n.pt")  # load pre trained model

# Defining args
data= /content/gdrive/MyDrive/3dPrinterFault/3d_printer_fault_v8.yaml
optimizer= AdamW
epochs=100
imgsz=640
batch=16
weight_decay=0.0005
cls=0.7
lr0=0.0001
lrf=0.0001

project = 'kfold_demo'

# Use the model
!yolo task=detect mode=train model=yolov8n.pt optimizer=optimizer data=data epochs=epochs imgsz=imgsz batch=batch weight_decay=weight_decay save=True cls=cls lr0=lr0 lrf=lrf


In [None]:
!python detect.py --weights yolov8n.pt --img 640 --conf 0.15 -- source /content/gdrive/MyDrive/3dPrinterFault/3d_printer_fault_v8.yaml

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

img = mpimg.imread('/content/runs/detect/train/confusion_matrix.png') #Replace "image.jpg" with the path of your image
plt.imshow(img)
plt.axis('off')
plt.show()

In [9]:
import locale
def getpreferredencoding(do_setlocale = True):
      return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

In [None]:
# Copy results to drive

!scp -r /content/runs '/content/gdrive/My Drive/3dPrinterFault/3rd_Runs'

# KFolds Validation with YOLOv8

In [None]:
# Provided by https://docs.ultralytics.com/guides/kfold-cross-validation/#generating-feature-vectors-for-object-detection-dataset

import datetime
import shutil
from pathlib import Path
from collections import Counter

import yaml
import numpy as np
import pandas as pd
from ultralytics import YOLO
from sklearn.model_selection import KFold

savedrivepath = Path('/content/gdrive/MyDrive/3dPrinterFault/dataset-after-split')
dataset_path = Path('/content/gdrive/MyDrive/3dPrinterFault/dataset-before-split') # replace with 'path/to/dataset' for your custom data
labels = sorted(dataset_path.rglob("*labels/*.txt")) # all data in 'labels'
# print(len(labels))

yaml_file = '/content/gdrive/MyDrive/3dPrinterFault/3DPrintFaults_yolov8.yaml'  # your data YAML with data directories and names dictionary
with open(yaml_file, 'r', encoding="utf8") as y:
    classes = yaml.safe_load(y)['names']
cls_idx = sorted(classes.keys())
# print(cls_idx)

indx = [l.stem for l in labels] # uses base filename as ID (no extension)

labels_df = pd.DataFrame([], columns=cls_idx, index=indx)
print(labels_df)
# print(labels_df.head())

# Counting instance of each class-label
for label in labels:
    lbl_counter = Counter()

    with open(label,'r') as lf:
        lines = lf.readlines()

    for l in lines:
        # classes for YOLO label uses integer at first position of each line
        lbl_counter[int(l.split(' ')[0])] += 1
        # print(lbl_counter)


    labels_df.loc[label.stem] = lbl_counter

labels_df = labels_df.fillna(0.0) # replace `nan` values with `0.0`

# print(labels_df.shape)
# print(labels_df.head())

ksplit = 5
kf = KFold(n_splits=ksplit, shuffle=True, random_state=20)   # setting random_state for repeatable results

kfolds = list(kf.split(labels_df))
# print(kfolds)
folds = [f'split_{n}' for n in range(1, ksplit + 1)]

folds_df = pd.DataFrame(index=indx, columns=folds)
# print(folds_df)
print(folds_df.to_string())

for idx, (train, val) in enumerate(kfolds, start=1):
    folds_df[f'split_{idx}'].loc[labels_df.iloc[train].index] = 'train'
    folds_df[f'split_{idx}'].loc[labels_df.iloc[val].index] = 'val'

fold_lbl_distrb = pd.DataFrame(index=folds, columns=cls_idx)
# print(labels_df)

for n, (train_indices, val_indices) in enumerate(kfolds, start=1):
    train_totals = labels_df.iloc[train_indices].sum()
    val_totals = labels_df.iloc[val_indices].sum()

    # To avoid division by zero, we add a small value (1E-7) to the denominator
    ratio = val_totals / (train_totals + 1E-7)
    fold_lbl_distrb.loc[f'split_{n}'] = ratio


In [4]:
supported_extensions = ['.jpg', '.jpeg', '.png','.JPG']

# Initialize an empty list to store image file paths
images = []

# Loop through supported extensions and gather image files
for ext in supported_extensions:
    images.extend(sorted((dataset_path / 'images').rglob(f"*{ext}")))

# Create the necessary directories and dataset YAML files (unchanged)
save_path = Path(savedrivepath / f'{datetime.date.today().isoformat()}_{ksplit}-Fold_Cross-val')
save_path.mkdir(parents=True, exist_ok=True)
ds_yamls = []

for split in folds_df.columns:
    # Create directories
    split_dir = save_path / split
    split_dir.mkdir(parents=True, exist_ok=True)
    (split_dir / 'train' / 'images').mkdir(parents=True, exist_ok=True)
    (split_dir / 'train' / 'labels').mkdir(parents=True, exist_ok=True)
    (split_dir / 'val' / 'images').mkdir(parents=True, exist_ok=True)
    (split_dir / 'val' / 'labels').mkdir(parents=True, exist_ok=True)

    # Create dataset YAML files
    dataset_yaml = split_dir / f'{split}_dataset.yaml'
    ds_yamls.append(dataset_yaml)

    with open(dataset_yaml, 'w') as ds_y:
        yaml.safe_dump({
            'path': split_dir.as_posix(),
            'train': 'train',
            'val': 'val',
            'names': classes
        }, ds_y)

In [6]:
import os

missing_files_folder = save_path / "missing_files"
os.makedirs(missing_files_folder, exist_ok=True)

for image, label in zip(images, labels):
    if image.stem not in folds_df.index:
        # Copy image and label files to the missing_files folder
        shutil.copy(image, missing_files_folder / image.name)
        shutil.copy(label, missing_files_folder / label.name)

In [5]:
for image, label in zip(images, labels):
    # if image.stem in folds_df.index:
    #  print("Stem exists in folds_df index")
    # else:
    #   print("Stem does not exist in folds_df index")
    for split, k_split in folds_df.loc[image.stem].items():

        if image.stem not in folds_df.index:
          raise ValueError(f"Stem {image.stem} not found in folds_df index")
        # Destination directory
        img_to_path = save_path / split / k_split / 'images'
        lbl_to_path = save_path / split / k_split / 'labels'

        # Copy image and label files to new directory (SamefileError if file already exists)
        shutil.copy(image, img_to_path / image.name)
        shutil.copy(label, lbl_to_path / label.name)

In [6]:
folds_df.to_csv(save_path / "kfold_datasplit.csv")
fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv")

In [None]:
model = YOLO("yolov8n.pt", task='detect')  # load pre trained model

results = {}

batch = 16
project = 'kfold_demo'
epochs=100
weight_decay=0.0005
cls=0.7
lr0=0.0001
lrf=0.0001
optimizer='AdamW'

for k in range(ksplit):
  dataset_yaml = ds_yamls[k]
  model.train(data=dataset_yaml, epochs=epochs, batch=batch, project=project, cls=cls, lr0=lr0, lrf=lrf, optimizer=optimizer, weight_decay=weight_decay)
  results[k] = model.metrics

In [11]:
!scp -r /content/ultralytics/kfold_demo '/content/gdrive/My Drive/3dPrinterFault/4_Runs'