# Training YOLOv5 custom dataset with ease

Check the full explanation on medium:

https://medium.com/mlearning-ai/training-yolov5-custom-dataset-with-ease-e4f6272148ad

In [1]:
!git clone https://github.com/ultralytics/yolov5  # clone
%cd yolov5
%pip install -qr requirements.txt  # install

import torch
from yolov5 import utils
display = utils.notebook_init() 

YOLOv5 🚀 v7.0-108-g4db6757 Python-3.8.10 torch-1.13.1+cu116 CUDA:0 (Tesla T4, 15110MiB)


Setup complete ✅ (2 CPUs, 12.7 GB RAM, 24.5/78.2 GB disk)


In [2]:
%cd ..

/content


In [4]:
import shutil
import os, sys

zip_file = "archive.zip"

if os.path.isfile(zip_file):
  shutil.unpack_archive(zip_file, "data")
else:
  print(zip_file + " not found")

In [5]:
import os, shutil, random

# preparing the folder structure

full_data_path = 'data/obj/'
extension_allowed = '.png'
split_percentage = 90

images_path = 'data/images/'
if os.path.exists(images_path):
    shutil.rmtree(images_path)
os.mkdir(images_path)
    
labels_path = 'data/labels/'
if os.path.exists(labels_path):
    shutil.rmtree(labels_path)
os.mkdir(labels_path)
    
training_images_path = images_path + 'training/'
validation_images_path = images_path + 'validation/'
training_labels_path = labels_path + 'training/'
validation_labels_path = labels_path +'validation/'
    
os.mkdir(training_images_path)
os.mkdir(validation_images_path)
os.mkdir(training_labels_path)
os.mkdir(validation_labels_path)

files = []

ext_len = len(extension_allowed)

for r, d, f in os.walk(full_data_path):
    for file in f:
        if file.endswith(extension_allowed):
            strip = file[0:len(file) - ext_len]      
            files.append(strip)

random.shuffle(files)

size = len(files)                   

split = int(split_percentage * size / 100)

print("copying training data")
for i in range(split):
    strip = files[i]
                         
    image_file = strip + extension_allowed
    src_image = full_data_path + image_file
    shutil.copy(src_image, training_images_path) 
                         
    annotation_file = strip + '.txt'
    src_label = full_data_path + annotation_file
    shutil.copy(src_label, training_labels_path) 

print("copying validation data")
for i in range(split, size):
    strip = files[i]
                         
    image_file = strip + extension_allowed
    src_image = full_data_path + image_file
    shutil.copy(src_image, validation_images_path) 
                         
    annotation_file = strip + '.txt'
    src_label = full_data_path + annotation_file
    shutil.copy(src_label, validation_labels_path) 

print("finished")

copying training data
copying validation data
finished


In [6]:
f = open("dataset.yaml", "a")

f.write("train: ../data/images/training/\n")
f.write("val: ../data/images/validation/\n")
f.write("nc: 1\n")
f.write("names: ['lymphocyte']\n")
f.close()

In [7]:
%cd yolov5
!python train.py --img 640 --batch 16 --epochs 20 --data ../dataset.yaml --weights yolov5s.pt

/content/yolov5
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=../dataset.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=20, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-108-g4db6757 Python-3.8.10 torch-1.13.1+cu116 CUDA:0 (Tesla T4, 15110MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anch

In [11]:
!python detect.py --weights runs/train/exp/weights/best.pt --img 640 --conf 0.3 --source /content/data/images/training --save-txt --exist-ok --save-conf --project /content/output --name predictions_train


[34m[1mdetect: [0mweights=['runs/train/exp/weights/best.pt'], source=/content/data/images/training, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.3, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=/content/output, name=predictions_train, exist_ok=True, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-108-g4db6757 Python-3.8.10 torch-1.13.1+cu116 CUDA:0 (Tesla T4, 15110MiB)

Fusing layers... 
Model summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
image 1/450 /content/data/images/training/image_0.png: 640x640 1 lymphocyte, 12.6ms
image 2/450 /content/data/images/training/image_1.png: 640x640 2 lymphocytes, 12.6ms
image 3/450 /content/data/images/training/image_10.png: 640x640 2 lymphocytes, 12.6ms
image 4/450 /content/data/images/training/image_100.pn

In [12]:
!python detect.py --weights runs/train/exp/weights/best.pt --img 640 --conf 0.3 --source /content/data/images/validation --save-txt --exist-ok --save-conf --project /content/output --name predictions_val


[34m[1mdetect: [0mweights=['runs/train/exp/weights/best.pt'], source=/content/data/images/validation, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.3, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=/content/output, name=predictions_val, exist_ok=True, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-108-g4db6757 Python-3.8.10 torch-1.13.1+cu116 CUDA:0 (Tesla T4, 15110MiB)

Fusing layers... 
Model summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
image 1/50 /content/data/images/validation/image_104.png: 640x640 (no detections), 12.6ms
image 2/50 /content/data/images/validation/image_131.png: 640x640 1 lymphocyte, 12.6ms
image 3/50 /content/data/images/validation/image_135.png: 640x640 1 lymphocyte, 12.6ms
image 4/50 /content/data/images/validation/im

In [13]:
import os
import csv

label_folder = "/content/output/predictions_val/labels"
csv_file = "/content/output/predictions_val/label_counts.csv"

with open(csv_file, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(["File Name", "Line Count"])
    for filename in os.listdir(label_folder):
        if filename.endswith(".txt"):
            with open(os.path.join(label_folder, filename), "r") as file:
                content = file.read()
                line_count = content.count('\n') if content else 0
                writer.writerow([os.path.splitext(filename)[0], line_count])
