<a href="https://colab.research.google.com/github/Yumax-panda/YOLO-research/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install required packages and import modules

In [None]:
!pip install ultralytics fastai

In [2]:
from ultralytics import YOLO
from fastai.vision.all import untar_data, URLs, set_seed, parent_label, get_image_files
import numpy as np
import pandas as pd
import os
import shutil

## Funcs

https://github.com/fastai/imagenette/blob/master/noisy_labels/generate_labels.ipynb

In [3]:
def get_labels(files):
    labels = []
    for file in files: labels.append(parent_label(file))
    return labels

def generate_noisy_labels(labels,unique_labels,pct_noise):
    noisy_labels = labels.copy() #copy labels list, this is will be the new list with noisy labels
    num_labels = len(labels) #number of labels
    num_classes = len(unique_labels) #number of unique labels
    noisy_idxs = [] #this is the list of indices where the labels will be switched
    indices = np.random.permutation(num_labels) #randomly permute the indices
    for i, idx in enumerate(indices):
        if i < pct_noise * num_labels: # only change the first pct_noise% of the permuted labels
            noisy_idxs.append(idx) #append to noisy_idxs
            before_label = noisy_labels[idx]
            while noisy_labels[idx] == before_label: #ensure that the new label isn't the same
                new_label = unique_labels[np.random.randint(num_classes)] #randomly select a new label
                noisy_labels[idx] = new_label  #assign new label
    return noisy_labels, noisy_idxs

def get_imagenette_relative_path(files):
    _files = []
    for i in range(len(files)): _files.append(os.path.join(*str(files[i]).split('/')[-3:]))
    return _files

## load imagewoof data

In [4]:
__datasets_path__ = "/content/datasets"
src = untar_data(URLs.IMAGEWOOF_160, data=__datasets_path__)
print(src)
train_files = get_image_files(src/"train")

/content/datasets/imagewoof2-160


In [5]:
labels = get_labels(train_files)
unique_labels = list(set(labels))
print(unique_labels)

['n02086240', 'n02105641', 'n02093754', 'n02087394', 'n02111889', 'n02115641', 'n02089973', 'n02088364', 'n02096294', 'n02099601']


## create noisy labels for imagewoof

In [6]:
noisy_labels_1, noisy_idxs_1 = generate_noisy_labels(labels, unique_labels, 0.01)
print(f'percentage noise: {100*len(noisy_idxs_1)/len(noisy_labels_1)}%')

example_idx = np.random.randint(len(noisy_idxs_1))
print(noisy_labels_1[noisy_idxs_1[example_idx]], labels[noisy_idxs_1[example_idx]])

percentage noise: 1.0083102493074791%
n02086240 n02089973


In [7]:
noisy_labels_5, noisy_idxs_5 = generate_noisy_labels(labels, unique_labels, 0.05)
noisy_labels_25, noisy_idxs_25 = generate_noisy_labels(labels, unique_labels, 0.25)
noisy_labels_50, noisy_idxs_50 = generate_noisy_labels(labels, unique_labels, 0.50)

In [8]:
_files = get_imagenette_relative_path(train_files)
train_df = pd.DataFrame({'path': _files,
              'noisy_labels_1': noisy_labels_1,
              'noisy_labels_5': noisy_labels_5,
              'noisy_labels_25': noisy_labels_25,
              'noisy_labels_50': noisy_labels_50,
              'is_valid': [False]*len(_files)
             })

In [9]:
val_files = get_image_files(src/'val')
labels = get_labels(val_files)
_files = get_imagenette_relative_path(val_files)
val_df = pd.DataFrame({'path': _files,
              'noisy_labels_1': labels,
              'noisy_labels_5': labels,
              'noisy_labels_25': labels,
              'noisy_labels_50': labels,
              'is_valid': [True]*len(_files)
             })

In [10]:
df = pd.concat([train_df,val_df])

In [11]:
df.head()
print(f"train datasize: {len(train_df)}")
print(f"val datasize: {len(val_df)}")
print(f"total: {len(df)}")

train datasize: 9025
val datasize: 3929
total: 12954


In [12]:
df.to_csv('noisy_imagewoof.csv', index=False)

## create datasets

In [13]:
def create_noisy_datasets(df: pd.DataFrame, name:str, column_idx: int) -> None:
  labels = ['n02111889', 'n02088364', 'n02096294', 'n02086240', 'n02089973', 'n02099601', 'n02105641', 'n02087394', 'n02115641', 'n02093754']

  try:
      os.mkdir(f"{__datasets_path__}/{name}")
      os.mkdir(f"{__datasets_path__}/{name}/train")
      os.mkdir(f"{__datasets_path__}/{name}/val")

      for label in labels:
        os.mkdir(f"{__datasets_path__}/{name}/train/{label}")
        os.mkdir(f"{__datasets_path__}/{name}/val/{label}")

  except FileExistsError:
    print("already exists")

  _col_idx_mapping = {}

  for idx, col in enumerate(df.columns, 0):
    _col_idx_mapping[col] = idx

  for row in df.values:
    path, noisy_label = row[0], row[column_idx]
    segments = path.split("/", 2)
    new_path = f"{segments[0]}/{noisy_label}/{segments[-1]}"

    shutil.copyfile(f"{__datasets_path__}/imagewoof2-160/{path}", f"{__datasets_path__}/{name}/{new_path}")


In [14]:
def run():
  # noisy_labels = ["noisy_labels_1",	"noisy_labels_5", "noisy_labels_25", "noisy_labels_50"]
  noisy_labels = ["noisy_labels_50"] # to prevent memory error, we use only 50% noised datasets

  # no noise datasets
  # model = YOLO('yolov8n-cls.pt')
  # result = model.train(data="imagewoof160", epochs=1, imgsz=224)
  # model.export(format='onnx')
  # model("https://image.peppynet.com/rv/renewal/archive/golden-retriever/images/main-img.png", save=True)

  for idx, noisy_label in enumerate(noisy_labels):
    print(f"\n\nStarting: {noisy_label}")
    create_noisy_datasets(df, noisy_label, idx+1)
    model = YOLO('yolov8n-cls.pt')
    result = model.train(data=f"{__datasets_path__}/{noisy_label}", epochs=1, imgsz=224)
    model.export(format='onnx')
    model("https://image.peppynet.com/rv/renewal/archive/golden-retriever/images/main-img.png", save=True)


## Run

In [15]:
# run()

## Validate

In [20]:
# After session is deleted, this util func must be called before using datasets
def setup():
  for idx, col in enumerate(["noisy_labels_1",	"noisy_labels_5", "noisy_labels_25", "noisy_labels_50"]):
    create_noisy_datasets(df, col, idx+1)

# setup()

In [18]:
!yolo val task=classify model=/content/noisy_0.onnx imgsz=224 data=/content/datasets/imagewoof2-160

Ultralytics YOLOv8.0.215 🚀 Python-3.10.12 torch-2.1.0+cu118 CPU (Intel Xeon 2.20GHz)
Loading /content/noisy_0.onnx for ONNX Runtime inference...
[31m[1mrequirements:[0m Ultralytics requirements ['onnx', 'onnxruntime'] not found, attempting AutoUpdate...
Collecting onnx
  Downloading onnx-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 15.7/15.7 MB 116.9 MB/s eta 0:00:00
Collecting onnxruntime
  Downloading onnxruntime-1.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.4/6.4 MB 221.1 MB/s eta 0:00:00
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 46.0/46.0 kB 216.3 MB/s eta 0:00:00
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [21]:
!yolo val task=classify model=/content/noisy_1.onnx imgsz=224 data=/content/datasets/noisy_labels_1

Ultralytics YOLOv8.0.215 🚀 Python-3.10.12 torch-2.1.0+cu118 CPU (Intel Xeon 2.20GHz)
Loading /content/noisy_1.onnx for ONNX Runtime inference...
Forcing batch=1 square inference (1,3,224,224) for non-PyTorch models
[34m[1mtrain:[0m /content/datasets/noisy_labels_1/train... found 9025 images in 10 classes ✅ 
[34m[1mval:[0m /content/datasets/noisy_labels_1/val... found 3929 images in 10 classes ✅ 
[34m[1mtest:[0m None...
[34m[1mval: [0mScanning /content/datasets/noisy_labels_1/val... 3929 images, 0 corrupt: 100% 3929/3929 [00:00<00:00, 4920.83it/s]
[34m[1mval: [0mNew cache created: /content/datasets/noisy_labels_1/val.cache
               classes   top1_acc   top5_acc: 100% 3929/3929 [00:59<00:00, 66.04it/s]
                   all      0.845      0.989
Speed: 0.0ms preprocess, 12.6ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns/classify/val2[0m
💡 Learn more at https://docs.ultralytics.com/modes/val


In [22]:
!yolo val task=classify model=/content/noisy_5.onnx imgsz=224 data=/content/datasets/noisy_labels_5

Ultralytics YOLOv8.0.215 🚀 Python-3.10.12 torch-2.1.0+cu118 CPU (Intel Xeon 2.20GHz)
Loading /content/noisy_5.onnx for ONNX Runtime inference...
Forcing batch=1 square inference (1,3,224,224) for non-PyTorch models
[34m[1mtrain:[0m /content/datasets/noisy_labels_5/train... found 9025 images in 10 classes ✅ 
[34m[1mval:[0m /content/datasets/noisy_labels_5/val... found 3929 images in 10 classes ✅ 
[34m[1mtest:[0m None...
[34m[1mval: [0mScanning /content/datasets/noisy_labels_5/val... 3929 images, 0 corrupt: 100% 3929/3929 [00:00<00:00, 4161.13it/s]
[34m[1mval: [0mNew cache created: /content/datasets/noisy_labels_5/val.cache
               classes   top1_acc   top5_acc: 100% 3929/3929 [00:59<00:00, 66.58it/s]
                   all      0.833       0.99
Speed: 0.0ms preprocess, 12.5ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns/classify/val3[0m
💡 Learn more at https://docs.ultralytics.com/modes/val


In [23]:
!yolo val task=classify model=/content/noisy_25.onnx imgsz=224 data=/content/datasets/noisy_labels_25

Ultralytics YOLOv8.0.215 🚀 Python-3.10.12 torch-2.1.0+cu118 CPU (Intel Xeon 2.20GHz)
Loading /content/noisy_25.onnx for ONNX Runtime inference...
Forcing batch=1 square inference (1,3,224,224) for non-PyTorch models
[34m[1mtrain:[0m /content/datasets/noisy_labels_25/train... found 9025 images in 10 classes ✅ 
[34m[1mval:[0m /content/datasets/noisy_labels_25/val... found 3929 images in 10 classes ✅ 
[34m[1mtest:[0m None...
[34m[1mval: [0mScanning /content/datasets/noisy_labels_25/val... 3929 images, 0 corrupt: 100% 3929/3929 [00:01<00:00, 3899.18it/s]
[34m[1mval: [0mNew cache created: /content/datasets/noisy_labels_25/val.cache
               classes   top1_acc   top5_acc: 100% 3929/3929 [00:56<00:00, 69.10it/s]
                   all      0.793      0.984
Speed: 0.0ms preprocess, 12.0ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns/classify/val4[0m
💡 Learn more at https://docs.ultralytics.com/modes/val


In [24]:
!yolo val task=classify model=/content/noisy_50.onnx imgsz=224 data=/content/datasets/noisy_labels_50

Ultralytics YOLOv8.0.215 🚀 Python-3.10.12 torch-2.1.0+cu118 CPU (Intel Xeon 2.20GHz)
Loading /content/noisy_50.onnx for ONNX Runtime inference...
Forcing batch=1 square inference (1,3,224,224) for non-PyTorch models
[34m[1mtrain:[0m /content/datasets/noisy_labels_50/train... found 9025 images in 10 classes ✅ 
[34m[1mval:[0m /content/datasets/noisy_labels_50/val... found 3929 images in 10 classes ✅ 
[34m[1mtest:[0m None...
[34m[1mval: [0mScanning /content/datasets/noisy_labels_50/val... 3929 images, 0 corrupt: 100% 3929/3929 [00:00<00:00, 4889.37it/s]
[34m[1mval: [0mNew cache created: /content/datasets/noisy_labels_50/val.cache
               classes   top1_acc   top5_acc: 100% 3929/3929 [00:52<00:00, 74.28it/s]
                   all      0.848      0.992
Speed: 0.0ms preprocess, 11.2ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns/classify/val5[0m
💡 Learn more at https://docs.ultralytics.com/modes/val
