# Training YOLOv3-tiny on Custom Dataset

This notebook will guide you through the process of training a YOLOv3-tiny model on your custom dataset.

## 1. Setup and Prerequisites

First, we'll use the provided setup script to compile Darknet. We need to ensure we're in the correct directory:

In [None]:
import os
import subprocess

# Navigate to the project root directory
os.chdir('..')
print(f"Current working directory: {os.getcwd()}")

# Check if darknet directory exists
if not os.path.exists('darknet'):
    print("Darknet directory not found. Setting up Darknet...")
    # Make the script executable
    subprocess.run(['chmod', '+x', 'setup_darknet.sh'])
    # Run the setup script
    subprocess.run(['./setup_darknet.sh'])
else:
    print("Darknet directory already exists. Skipping setup.")

# Navigate back to the notebooks directory
os.chdir('notebooks')
print(f"Current working directory: {os.getcwd()}")

In [2]:
PROJECT_ROOT = "/home/akash/My_Projects/Optical-Plant-Identification-for-Farming-Vehicles-"
DARKNET_PATH = os.path.join(PROJECT_ROOT, "darknet")
DATA_PATH = os.path.join(PROJECT_ROOT, "data", "processed_data")

## 2. Prepare Training and Validation Lists

create two files: `train.txt`, `valid.txt`

In [None]:
def create_yolo_list_file(set_type):
    image_dir = os.path.join(DATA_PATH, set_type, "images")
    label_dir = os.path.join(DATA_PATH, set_type, "labels")
    output_file = os.path.join(DARKNET_PATH, "data", f"{set_type}.txt")
    
    with open(output_file, "w") as f:
        for image in os.listdir(image_dir):
            if image.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.abspath(os.path.join(image_dir, image))
                label_path = os.path.abspath(os.path.join(label_dir, os.path.splitext(image)[0] + '.txt'))
                
                if os.path.exists(label_path):
                    f.write(f"{image_path} {label_path}\n")
                else:
                    print(f"Warning: No label file for {image}")
    
    print(f"Created {output_file}")

# Create train.txt and val.txt
create_yolo_list_file("augmented_train")
create_yolo_list_file("val")

# Read and print class names
classes_file = os.path.join(DATA_PATH, 'augmented_train', 'labels', 'classes.txt')
if os.path.exists(classes_file):
    with open(classes_file, 'r') as f:
        classes = f.read().splitlines()
    print(f"Classes found: {classes}")
else:
    print("Warning: classes.txt file not found in the expected location.")

print("Data file creation completed. Please verify the contents of the created files.")

## 3. Prepare Configuration Files

We need to create three files: `obj.names`, `obj.data`, and `yolov3-tiny.cfg`

In [4]:
# Create obj.names
with open(os.path.join(DARKNET_PATH, "data", "obj.names"), "w") as f:
    f.write("rumex_acetosa\n")

# Create obj.data
obj_data_content = f"""classes = 1
train = data/augmented_train.txt
valid = data/val.txt
names = data/obj.names
backup = backup/
"""
with open(os.path.join(DARKNET_PATH, "data", "obj.data"), "w") as f:
    f.write(obj_data_content)

In [None]:
# Copy and print yolov3-tiny.cfg
!cp ../darknet/cfg/yolov3-tiny.cfg ../darknet/cfg/yolov3-tiny-obj.cfg

print("Original configuration:")
!head -n 20 ../darknet/cfg/yolov3-tiny-obj.cfg

In [None]:
cfg_path = os.path.join(DARKNET_PATH, "cfg", "yolov3-tiny-obj.cfg")

# Modify yolov3-tiny-obj.cfg
!sed -i 's/# Training/Training/' {cfg_path}
!sed -i 's/batch=1/# batch=1/' {cfg_path}
!sed -i 's/subdivisions=1/# subdivisions=1/' {cfg_path}
!sed -i 's/# batch=64/batch=32/' {cfg_path}
!sed -i 's/# subdivisions=2/subdivisions=32/' {cfg_path}
!sed -i 's/width=416/width=416/' {cfg_path}
!sed -i 's/height=416/height=416/' {cfg_path}
!sed -i 's/max_batches = 500200/max_batches = 2000/' {cfg_path}
!sed -i 's/learning_rate=0.001/learning_rate=0.001/' {cfg_path}
!sed -i '/steps=/ c\steps=1600,1800' {cfg_path}
!sed -i 's/classes=80/classes=1/g' {cfg_path}
!sed -i 's/filters=255/filters=18/g' {cfg_path}

print("Updated configuration:")
!head -n 20 {cfg_path}

print("\nYOLO layer changes:")
!grep -A 3 "\[yolo\]" {cfg_path}

## 4. Download Pre-trained Weights

In [None]:
import os
import urllib.request

# Define the weights file path
weights_path = os.path.join(DARKNET_PATH, 'yolov3-tiny.weights')

# Check if weights file exists
if not os.path.exists(weights_path):
    print("Downloading YOLOv3-tiny weights...")
    weights_url = 'https://pjreddie.com/media/files/yolov3-tiny.weights'
    urllib.request.urlretrieve(weights_url, weights_path)
    print("Weights downloaded successfully.")
else:
    print("YOLOv3-tiny weights file already exists. Skipping download.")

## 5. Start Training

In [None]:
# Start training
!cd ../darknet && ./darknet detector train data/obj.data cfg/yolov3-tiny-obj.cfg yolov3-tiny.weights -dont_show -map

## 6. Test the Trained Model

Once training is complete, you can test your model on new images:

In [None]:
!cd ../darknet && ./darknet detector test data/obj.data cfg/yolov3-tiny-obj.cfg backup/yolov3-tiny-obj_final.weights path_to_test_image.jpg

In [None]:
import random
import cv2

def check_random_images(data_file, num_images=5):
    with open(data_file, 'r') as f:
        lines = f.readlines()
    
    sample = random.sample(lines, min(num_images, len(lines)))
    
    for line in sample:
        image_path, _ = line.strip().split()
        img = cv2.imread(image_path)
        if img is None:
            print(f"Failed to load image: {image_path}")
        else:
            print(f"Successfully loaded image: {image_path}, Shape: {img.shape}")

print("Checking random training images:")
check_random_images(os.path.join(DARKNET_PATH, "data", "augmented_train.txt"))

print("\nChecking random validation images:")
check_random_images(os.path.join(DARKNET_PATH, "data", "val.txt"))

In [None]:
import os

def check_file_paths(list_file):
    with open(list_file, 'r') as f:
        lines = f.readlines()
    
    for line in lines:
        image_path, label_path = line.strip().split()
        if not os.path.exists(image_path):
            print(f"Image not found: {image_path}")
        if not os.path.exists(label_path):
            print(f"Label not found: {label_path}")

print("Checking training file paths:")
check_file_paths(os.path.join(DARKNET_PATH, "data", "augmented_train.txt"))

print("\nChecking validation file paths:")
check_file_paths(os.path.join(DARKNET_PATH, "data", "val.txt"))

In [None]:
def print_file_head(file_path, num_lines=5):
    with open(file_path, 'r') as f:
        for i, line in enumerate(f):
            if i < num_lines:
                print(line.strip())
            else:
                break

print("First 5 lines of training list file:")
print_file_head(os.path.join(DARKNET_PATH, "data", "augmented_train.txt"))

print("\nFirst 5 lines of validation list file:")
print_file_head(os.path.join(DARKNET_PATH, "data", "val.txt"))

In [None]:
print("Content of obj.data file:")
with open(os.path.join(DARKNET_PATH, "data", "obj.data"), 'r') as f:
    print(f.read())

In [None]:
def print_yolo_layers(config_file):
    with open(config_file, 'r') as f:
        lines = f.readlines()
    
    for i, line in enumerate(lines):
        if line.strip().startswith('[yolo]'):
            print(f"YOLO layer found at line {i+1}:")
            for j in range(i, min(i+10, len(lines))):
                print(lines[j].strip())
            print()

print("YOLO layers in the configuration file:")
print_yolo_layers(os.path.join(DARKNET_PATH, "cfg", "yolov3-tiny-obj.cfg"))

In [None]:
import random

def check_random_labels(data_file, num_labels=5):
    with open(data_file, 'r') as f:
        lines = f.readlines()
    
    sample = random.sample(lines, min(num_labels, len(lines)))
    
    for line in sample:
        _, label_path = line.strip().split()
        print(f"Content of {label_path}:")
        with open(label_path, 'r') as label_file:
            print(label_file.read())
        print()

print("Checking random training labels:")
check_random_labels(os.path.join(DARKNET_PATH, "data", "augmented_train.txt"))

print("\nChecking random validation labels:")
check_random_labels(os.path.join(DARKNET_PATH, "data", "val.txt"))

In [None]:
print("Content of obj.names file:")
with open(os.path.join(DARKNET_PATH, "data", "obj.names"), 'r') as f:
    print(f.read())

In [None]:
import cv2

def check_random_image_dimensions(data_file, num_images=5):
    with open(data_file, 'r') as f:
        lines = f.readlines()
    
    sample = random.sample(lines, min(num_images, len(lines)))
    
    for line in sample:
        image_path, _ = line.strip().split()
        img = cv2.imread(image_path)
        print(f"Image: {image_path}")
        print(f"Dimensions: {img.shape}")
        print(f"Data type: {img.dtype}")
        print()

print("Checking random training image dimensions:")
check_random_image_dimensions(os.path.join(DARKNET_PATH, "data", "augmented_train.txt"))

print("\nChecking random validation image dimensions:")
check_random_image_dimensions(os.path.join(DARKNET_PATH, "data", "val.txt"))