
Step 1: Mount Google Drive


In [None]:
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive/', force_remount=True)

# Dataset directory
dataset_dir = '/content/drive/My Drive/archive'
print("Drive contents:", os.listdir(dataset_dir))

# Paths to dataset components
train_img_dir = '/content/drive/My Drive/archive/images/train'
val_img_dir = '/content/drive/My Drive/archive/images/val'
train_label_dir = '/content/drive/My Drive/archive/labels/train'
val_label_dir = '/content/drive/My Drive/archive/labels/val'
dataset_yaml_path = '/content/drive/My Drive/archive/data.yaml'
yolov5_path = '/content/drive/MyDrive/yolov5'


Mounted at /content/drive/
Drive contents: ['data.yaml', 'humans', 'images', 'labels', 'face_detection_training.ipynb']


Step 2: Verify Dataset Integrity

In [None]:
# Function to verify dataset integrity
def verify_dataset(image_dir, label_dir):
    image_files = set(f.split('.')[0] for f in os.listdir(image_dir) if f.endswith(('.jpg', '.jpeg', '.png')))
    label_files = set(f.split('.')[0] for f in os.listdir(label_dir) if f.endswith('.txt'))
    missing_labels = image_files - label_files
    if missing_labels:
        print(f"❌ Missing labels for images: {missing_labels}")
    else:
        print("✅ All images have corresponding labels.")
    print(f"Total images: {len(image_files)}, Total labels: {len(label_files)}")

# Verify training and validation datasets
print("\nVerifying training dataset...")
verify_dataset(train_img_dir, train_label_dir)

print("\nVerifying validation dataset...")
verify_dataset(val_img_dir, val_label_dir)



Verifying training dataset...
✅ All images have corresponding labels.
Total images: 13386, Total labels: 13386

Verifying validation dataset...
✅ All images have corresponding labels.
Total images: 3347, Total labels: 3347


Step 3: Clean Dataset by Removing Unmatched Images

In [None]:
def clean_dataset(image_dir, label_dir):
    image_files = set(f.split('.')[0] for f in os.listdir(image_dir) if f.endswith(('.jpg', '.jpeg', '.png')))
    label_files = set(f.split('.')[0] for f in os.listdir(label_dir) if f.endswith('.txt'))
    unmatched_images = image_files - label_files
    for img in unmatched_images:
        img_path = os.path.join(image_dir, img + ".jpg")
        if os.path.exists(img_path):
            os.remove(img_path)
            print(f"Removed unmatched image: {img_path}")

# Clean training and validation datasets
print("\nCleaning training dataset...")
clean_dataset(train_img_dir, train_label_dir)

print("\nCleaning validation dataset...")
clean_dataset(val_img_dir, val_label_dir)



Cleaning training dataset...

Cleaning validation dataset...




Step 4: Clone YOLOv5 Repository and Install Dependencies

In [None]:
# Clone YOLOv5 if not already cloned
if not os.path.exists(yolov5_path):
    print("\nCloning YOLOv5 repository...")
    !git clone https://github.com/ultralytics/yolov5 {yolov5_path}
else:
    print("\n✅ YOLOv5 repository already exists.")

# Install YOLOv5 requirements
%cd {yolov5_path}
!pip install -r requirements.txt


✅ YOLOv5 repository already exists.
/content/drive/MyDrive/yolov5
Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting ultralytics>=8.2.34 (from -r requirements.txt (line 18))
  Downloading ultralytics-8.3.47-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics>=8.2.34->-r requirements.txt (line 18))
  Downloading ultralytics_thop-2.0.12-py3-none-any.whl.metadata (9.4 kB)
Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Downloading ultralytics-8.3.47-py3-none-any.whl (898 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m898.8/898.8 kB[0m [31m56.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.12-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.3.47 ultralytics-thop-2.0.12


Step 5: Train YOLOv5 Model

In [None]:
import torch
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Training YOLOv5 from scratch
print("\n🔍 Starting YOLOv5 training from scratch...")
!python train.py \
    --img 640 \
    --batch-size 32 \
    --epochs 50 \
    --data "/content/drive/My Drive/archive/data.yaml" \
    --cfg "./models/yolov5s.yaml" \
    --weights "" \
    --project "/content/drive/My Drive/training" \
    --name "face" \
    --device {device}
print("\n✅ Training completed. Check the saved weights in the project directory.")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with torch.cuda.amp.autocast(amp):
  with tor

Step 6: Validate Model Performance

In [None]:
import os
import shutil

# Paths for the original best weights and the renamed weights
original_best_path = '/content/drive/My Drive/training/face2/weights/best.pt'
renamed_weights_path = '/content/drive/My Drive/training/face2/weights/trained.pt'

# Path to the dataset YAML file
dataset_yaml_path = '/content/drive/My Drive/archive/data.yaml'

# Step 1: Rename `best.pt` to `trained.pt`
os.makedirs(os.path.dirname(renamed_weights_path), exist_ok=True)  # Ensure directory exists

if os.path.exists(original_best_path):
    shutil.move(original_best_path, renamed_weights_path)
    print(f"\n✅ Successfully renamed weights to: {renamed_weights_path}")
else:
    print(f"\n❌ Best weights not found at: {original_best_path}")
    renamed_weights_path = None  # Set to None if renaming fails

# Step 2: Validate the model using the renamed weights
if renamed_weights_path and os.path.exists(renamed_weights_path):
    print(f"\n✅ Custom weights found at: {renamed_weights_path}")
    print("\n🔍 Running validation...")

    # Build and execute the validation command
    validation_command = f"""
    python val.py \
        --weights "{renamed_weights_path}" \  # Use custom weights
        --data "{dataset_yaml_path}" \  # Dataset configuration
        --img 640  # Image size
    """
    os.system(validation_command)

    print("\n✅ Validation completed successfully.")
else:
    print(f"\n❌ Custom weights not found at: {renamed_weights_path}")


✅ Successfully renamed weights to: /content/drive/My Drive/training/face2/weights/trained.pt

✅ Custom weights found at: /content/drive/My Drive/training/face2/weights/trained.pt

🔍 Running validation...

✅ Validation completed successfully.
