In [1]:
import os
import glob
import shutil
from sklearn.model_selection import train_test_split
from tqdm import tqdm # For a nice progress bar

# --- 1. Define Paths ---
image_dir = "/kaggle/input/acne04-yolov8/Images"
label_dir = "/kaggle/input/acne04-yolov8/labels/content/labels"
output_dir = "/kaggle/working/acne_dataset" # This is where the new dataset will be created

# Define split ratios
# We'll do 70% train, 20% validation, 10% test
train_ratio = 0.7
val_ratio = 0.2
test_ratio = 0.1

# --- 2. Find and Match Files ---
print("Starting to find and match image/label pairs...")
image_paths = []
label_paths = []

# Supported image extensions
extensions = ['.jpg', '.jpeg', '.png']

for img_path in sorted(glob.glob(os.path.join(image_dir, "*.*"))):
    if not img_path.lower().endswith(tuple(extensions)):
        continue # Skip non-image files
        
    # Derive the corresponding label path
    file_name = os.path.basename(img_path)
    base_name = os.path.splitext(file_name)[0]
    label_path = os.path.join(label_dir, f"{base_name}.txt")
    
    # **Crucial Check: Make sure the label file exists**
    if os.path.exists(label_path):
        image_paths.append(img_path)
        label_paths.append(label_path)
    else:
        print(f"Warning: Missing label for {img_path}")

print(f"Found {len(image_paths)} matching image/label pairs.")
if len(image_paths) == 0:
    print("Error: No matching pairs found. Check your paths and file names.")
else:
    # --- 3. Create New Directory Structure ---
    sets = ['train', 'val', 'test']
    for s in sets:
        os.makedirs(os.path.join(output_dir, 'images', s), exist_ok=True)
        os.makedirs(os.path.join(output_dir, 'labels', s), exist_ok=True)
    print(f"Created directory structure at {output_dir}")

    # --- 4. Split the Data ---
    # First split: (train + val) and (test)
    train_val_img, test_img, train_val_lbl, test_lbl = train_test_split(
        image_paths, label_paths, test_size=test_ratio, random_state=42
    )
    
    # Second split: (train) and (val) from the (train + val) set
    # Note: We calculate the validation size relative to the train_val set
    val_size_relative = val_ratio / (train_ratio + val_ratio)
    train_img, val_img, train_lbl, val_lbl = train_test_split(
        train_val_img, train_val_lbl, test_size=val_size_relative, random_state=42
    )
    
    print(f"Total pairs: {len(image_paths)}")
    print(f"Training set: {len(train_img)} pairs")
    print(f"Validation set: {len(val_img)} pairs")
    print(f"Test set: {len(test_img)} pairs")

    # --- 5. Copy Files to New Structure ---
    def copy_files(image_list, label_list, set_name):
        img_dest = os.path.join(output_dir, 'images', set_name)
        lbl_dest = os.path.join(output_dir, 'labels', set_name)
        
        print(f"\nCopying {set_name} files...")
        for img_path, lbl_path in tqdm(zip(image_list, label_list), total=len(image_list)):
            shutil.copy(img_path, img_dest)
            shutil.copy(lbl_path, lbl_dest)

    # Copy files for each set
    copy_files(train_img, train_lbl, 'train')
    copy_files(val_img, val_lbl, 'val')
    copy_files(test_img, test_lbl, 'test')
    
    print("\nData splitting and copying complete!")

Starting to find and match image/label pairs...
Found 1457 matching image/label pairs.
Created directory structure at /kaggle/working/acne_dataset
Total pairs: 1457
Training set: 1019 pairs
Validation set: 292 pairs
Test set: 146 pairs

Copying train files...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1019/1019 [00:30<00:00, 33.79it/s]



Copying val files...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 292/292 [00:09<00:00, 31.31it/s]



Copying test files...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 146/146 [00:05<00:00, 28.84it/s]


Data splitting and copying complete!





In [2]:
# --- 6. Create the data.yaml file ---

# PLEASE EDIT THIS LIST
class_names = [
    'pimple'
] # e.g., ['pimple', 'blackhead', 'whitehead']

yaml_content = f"""
path: {output_dir}  # dataset root directory
train: images/train  # train images (relative to 'path')
val: images/val      # val images (relative to 'path')
test: images/test    # test images (relative to 'path')

# Classes
names:
  {os.linesep.join([f'  {i}: {name}' for i, name in enumerate(class_names)])}
"""

yaml_path = os.path.join(output_dir, 'data.yaml')
with open(yaml_path, 'w') as f:
    f.write(yaml_content)

print(f"Created data.yaml at {yaml_path}")
print("\n--- PLEASE READ ---")
print(f"Before you train, you MUST edit the 'names' section in {yaml_path}")
print("Replace 'class_0_name', etc., with your actual class names.")
print("You can edit it using the Kaggle file editor in the right-hand sidebar.")

Created data.yaml at /kaggle/working/acne_dataset/data.yaml

--- PLEASE READ ---
Before you train, you MUST edit the 'names' section in /kaggle/working/acne_dataset/data.yaml
Replace 'class_0_name', etc., with your actual class names.
You can edit it using the Kaggle file editor in the right-hand sidebar.


In [3]:
# Install the ultralytics library
!pip install ultralytics -q

from ultralytics import YOLO

# --- 7. Train the Model ---

# Load a pretrained model (e.g., yolov8n.pt for small, yolov8m.pt for medium)
model = YOLO('yolov8s.pt') 

# Define the path to your newly created YAML file
data_yaml_path = '/kaggle/working/acne_dataset/data.yaml'

# Start training!
print("Starting model training...")
results = model.train(
    data=data_yaml_path,
    epochs=150,       # Start with 50-100 epochs, increase later
    imgsz=640,       # Image size (640 is common)
    batch=16,        # Adjust based on your GPU memory
    name='yolov8n_acne_custom' # Name for the results folder
)

print("Training complete!")
print(f"Results saved to: {results.save_dir}")

[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m62.0/62.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.1/1.1 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m00:01[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m16.8/16.8 MB[0m [31m67.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m13.8/13.8 MB[0

  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all        292       3874      0.437      0.398      0.326     0.0855
Speed: 0.2ms preprocess, 5.0ms inference, 0.0ms loss, 2.3ms postprocess per image
Results saved to [1m/kaggle/working/runs/detect/yolov8n_acne_custom[0m
Training complete!
Results saved to: /kaggle/working/runs/detect/yolov8n_acne_custom


In [4]:
import shutil
from IPython.display import FileLink

# Define paths
image_dir = '/kaggle/working/acne_dataset/images/test'
label_dir = '/kaggle/working/acne_dataset/labels/test'
zip_path = '/kaggle/working/acne_test.zip'

# Create a temporary folder to combine both
combined_dir = '/kaggle/working/acne_test_combined'

# Copy both folders into one combined directory
shutil.copytree(image_dir, f'{combined_dir}/images')
shutil.copytree(label_dir, f'{combined_dir}/labels')

# Zip the combined directory
shutil.make_archive('/kaggle/working/acne_test', 'zip', combined_dir)

# Provide download link
FileLink(zip_path)


In [5]:
import shutil
from IPython.display import FileLink

# Define paths
image_dir = '/kaggle/working/acne_dataset/images/train'
label_dir = '/kaggle/working/acne_dataset/labels/train'
zip_path = '/kaggle/working/acne_train.zip'

# Create a temporary folder to combine both
combined_dir = '/kaggle/working/acne_train_combined'

# Copy both folders into one combined directory
shutil.copytree(image_dir, f'{combined_dir}/images')
shutil.copytree(label_dir, f'{combined_dir}/labels')

# Zip the combined directory
shutil.make_archive('/kaggle/working/acne_train', 'zip', combined_dir)

# Provide download link
FileLink(zip_path)


In [6]:
import shutil
from IPython.display import FileLink

# Define paths
image_dir = '/kaggle/working/acne_dataset/images/val'
label_dir = '/kaggle/working/acne_dataset/labels/val'
zip_path = '/kaggle/working/acne_valid.zip'

# Create a temporary folder to combine both
combined_dir = '/kaggle/working/acne_valid_combined'

# Copy both folders into one combined directory
shutil.copytree(image_dir, f'{combined_dir}/images')
shutil.copytree(label_dir, f'{combined_dir}/labels')

# Zip the combined directory
shutil.make_archive('/kaggle/working/acne_valid', 'zip', combined_dir)

# Provide download link
FileLink(zip_path)


In [1]:
from ultralytics import YOLO

# Load your BEST model
model = YOLO('/kaggle/working/runs/detect/yolov8n_acne_custom/weights/best.pt')

# Run prediction on your test images and save the results
results = model.predict(
    source='/kaggle/working/acne_dataset/images/test/', 
    save=True
)

print("Predictions saved! Check the /kaggle/working/runs/detect/predict/ folder.")

[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m62.0/62.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.1/1.1 MB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m16.8/16.8 MB[0m [31m104.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m363.4/363.4 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m13.8/13.8 MB[0

ModuleNotFoundError: No module named 'ultralytics'


image 1/146 /kaggle/working/acne_dataset/images/test/levle0_111.jpg: 640x576 1 pimple, 53.9ms
image 2/146 /kaggle/working/acne_dataset/images/test/levle0_124.jpg: 640x576 2 pimples, 15.6ms
image 3/146 /kaggle/working/acne_dataset/images/test/levle0_138.jpg: 640x576 1 pimple, 15.6ms
image 4/146 /kaggle/working/acne_dataset/images/test/levle0_139.jpg: 640x576 1 pimple, 15.6ms
image 5/146 /kaggle/working/acne_dataset/images/test/levle0_143.jpg: 640x576 (no detections), 15.6ms
image 6/146 /kaggle/working/acne_dataset/images/test/levle0_145.jpg: 640x576 2 pimples, 15.6ms
image 7/146 /kaggle/working/acne_dataset/images/test/levle0_152.jpg: 640x576 (no detections), 15.6ms
image 8/146 /kaggle/working/acne_dataset/images/test/levle0_158.jpg: 640x576 (no detections), 15.6ms
image 9/146 /kaggle/working/acne_dataset/images/test/levle0_16.jpg: 640x576 1 pimple, 16.2ms
image 10/146 /kaggle/working/acne_dataset/images/test/levle0_162.jpg: 640x576 (no detections), 15.6ms
image 11/146 /kaggle/working/

In [8]:
import shutil
from IPython.display import FileLink

# Path to folder and output zip
folder_path = '/kaggle/working/runs'
zip_path = '/kaggle/working/runs.zip'

# Create zip archive
shutil.make_archive('/kaggle/working/runs', 'zip', folder_path)

# Provide download link
FileLink(zip_path)