<a href="https://colab.research.google.com/github/dgizdevans/master/blob/main/ai_project/object_detection_yolov8_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Remove previous unstable version of ultralytics
!pip uninstall -y ultralytics

[0m

In [None]:
# Install libraries
!pip install ultralytics==8.0.3
!pip install google-cloud-storage

Collecting ultralytics==8.0.3
  Downloading ultralytics-8.0.3-py3-none-any.whl.metadata (23 kB)
Collecting hydra-core>=1.2.0 (from ultralytics==8.0.3)
  Downloading hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB)
Collecting thop>=0.1.1 (from ultralytics==8.0.3)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting omegaconf<2.4,>=2.2 (from hydra-core>=1.2.0->ultralytics==8.0.3)
  Downloading omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)
Collecting antlr4-python3-runtime==4.9.* (from hydra-core>=1.2.0->ultralytics==8.0.3)
  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.0/117.0 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting jedi>=0.16 (from ipython->ultralytics==8.0.3)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading ultralytics-8.0.3-py3-none-any.whl (247 kB)
[2K   [90m━━



In [None]:
# Import necessary modules
import os
from google.colab import auth
from google.cloud import storage
from ultralytics import YOLO

In [None]:
# Authenticate and connect to GCP
auth.authenticate_user()
project_id = "ai-group-project"  # Your Google Cloud Project ID
client = storage.Client(project=project_id)
bucket_name = "ai-group-project-data"  # Your GCP bucket name
bucket = client.bucket(bucket_name)

In [None]:
# Local paths for storing data
local_dataset_path = "/content/datasets/labeled_data"
local_data_yaml = f"{local_dataset_path}/data.yaml"
gcs_data_yaml_path = "datasets/model/data.yaml"  # Path to data.yaml in GCP
gcs_labeled_data_path = "datasets/model"  # Path to labeled data (images and labels)

In [None]:
# Create local directories
os.makedirs(local_dataset_path, exist_ok=True)

In [None]:
# Function to download data from GCP
def download_from_gcs(bucket, gcs_folder, local_folder):
    blobs = bucket.list_blobs(prefix=gcs_folder)
    for blob in blobs:
        # Skip folders
        if blob.name.endswith('/'):
            continue
        local_file_path = os.path.join(local_folder, os.path.relpath(blob.name, gcs_folder))
        os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
        blob.download_to_filename(local_file_path)

In [None]:
# Download labeled data from GCP
print("Downloading labeled data from GCP...")
download_from_gcs(bucket, gcs_labeled_data_path, local_dataset_path)
print("Labeled data downloaded successfully!")

Downloading labeled data from GCP...
Labeled data downloaded successfully!


In [None]:
## Check data.yaml status and adapt paths to the local environment
import yaml

# Check if data.yaml exists
if not os.path.exists(local_data_yaml):
    raise FileNotFoundError(f"{local_data_yaml} not found. Ensure your data.yaml is uploaded to GCP.")

# Display the contents of data.yaml
print("Found data.yaml. Displaying its contents:")
with open(local_data_yaml, 'r') as yaml_file:
    data_yaml_content = yaml.safe_load(yaml_file)
    print(yaml.dump(data_yaml_content, default_flow_style=False))

Found data.yaml. Displaying its contents:
names:
  0: V
  1: C
  2: S
train: /content/datasets/labeled_data/train
val: /content/datasets/labeled_data/val



In [None]:
# Define paths for pretrained model and training results
pretrained_model_path = "yolov8n.pt"  # Pretrained YOLOv8 model
output_dir = "/content/runs/train"  # Directory for saving training results

In [None]:
# Train the YOLOv8 model
print("Starting model training...")
model = YOLO(pretrained_model_path)
model.train(
    data=local_data_yaml,  # Path to data.yaml
    epochs=50,            # Number of epochs
    imgsz=640,            # Image size
    batch=16,             # Batch size
    save=True,            # Save model
    project=output_dir,   # Directory for saving results
    name="yolov8_training"  # Experiment name
)
print("Model training completed.")

Starting model training...


Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to yolov8n.pt...
100%|██████████| 6.23M/6.23M [00:00<00:00, 132MB/s]

  ckpt = torch.load(attempt_download(weight), map_location='cpu')  # load
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=yolov8n.yaml, data=/content/datasets/labeled_data/data.yaml, epochs=50, patience=50, batch=16, imgsz=640, save=True, cache=False, device=None, workers=8, project=/content/runs/train, name=yolov8_training, exist_ok=False, pretrained=False, optimizer=SGD, verbose=False, seed=0, deterministic=True, single_cls=False, image_weights=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, overlap_mask=True, mask_ratio=4, dropout=False, val=True, save_json=False, save_hybrid=False, conf=0.001, iou=0.7, max_det=300, half=True, dnn=False, plots=True, source=ultralytics/assets/, show=False, save_txt=False, save_conf=False, save_crop=False, hide_labels=False, hide_conf=False, vid_stride=1, line_thick

Model training completed.


In [None]:
# Explicit validation after training
print("Starting model validation...")
val_results = model.val(data=local_data_yaml)
print("Validation completed.")

Ultralytics YOLOv8.0.3 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)
Fusing layers... 


Starting model validation...


Model summary: 168 layers, 3006233 parameters, 13065 gradients, 8.1 GFLOPs
[34m[1mval: [0mScanning /content/datasets/labeled_data/val/labels.cache... 163 images, 0 backgrounds, 0 corrupt: 100%|██████████| 163/163 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   9%|▉         | 1/11 [00:00<00:07,  1.29it/s]Exception in thread Thread-182 (plot_images):
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/ultralytics/yolo/utils/plotting.py", line 250, in plot_images
    annotator.box_label(box, label, color=color)
  File "/usr/local/lib/python3.10/dist-packages/ultralytics/yolo/utils/plotting.py", line 63, in box_label
    w, h = self.font.getsize(label)  # text width, height
AttributeError: 'FreeTypeFont' 

Validation completed.


In [None]:
# Save training results back to GCP
gcs_output_path = "models/yolov8_training"  # Path for saving results in GCP
print("Uploading training results to GCP...")
output_training_path = os.path.join(output_dir, "yolov8_training")
blobs_to_upload = [os.path.join(dp, f) for dp, dn, filenames in os.walk(output_training_path) for f in filenames]

for local_file in blobs_to_upload:
    relative_path = os.path.relpath(local_file, output_training_path)
    gcs_file_path = os.path.join(gcs_output_path, relative_path)
    blob = bucket.blob(gcs_file_path)
    blob.upload_from_filename(local_file)
print(f"Training results uploaded to GCP at {gcs_output_path}")

Uploading training results to GCP...
Training results uploaded to GCP at models/yolov8_training


In [None]:
# Notifications for completion of steps
print("\nProcess Summary:")
print("1. Labeled data downloaded successfully.")
print("2. YOLOv8 model training completed.")
print("3. Model validation completed with results displayed.")
print("4. Training results successfully uploaded to GCP.")


Process Summary:
1. Labeled data downloaded successfully.
2. YOLOv8 model training completed.
3. Model validation completed with results displayed.
4. Training results successfully uploaded to GCP.


**Model Testing on Labeled Test Data with Performance Metrics Evaluation**

In [None]:
import shutil

In [None]:
# Paths
trained_model_path = "/content/runs/train/yolov8_training/weights/best.pt"  # Path to trained YOLOv8 model
test_images_path = "/content/datasets/labeled_data/test/images"  # Local path for test images
results_path = "/content/datasets/test_results_v2"  # Local folder for test results
gcs_results_path = "models/test_results/test_data_v2"  # GCS folder for saving results
test_data_yaml = "/content/test_data.yaml"  # Temporary YAML file for test data configuration


In [None]:
# Ensure the local directory for results exists
os.makedirs(results_path, exist_ok=True)

In [None]:
# Create a temporary data.yaml for testing
test_data_config = {
    "train": None,
    "val": test_images_path,
    "names": {
        0: "V",  # Passenger Vehicles
        1: "C",  # Cargo Vehicles
        2: "S"   # Buses
    }
}
with open(test_data_yaml, "w") as yaml_file:
    yaml.dump(test_data_config, yaml_file)

In [None]:
# Load the trained YOLOv8 model
print("Loading the trained YOLOv8 model for testing...")
model = YOLO(trained_model_path)

Loading the trained YOLOv8 model for testing...


  ckpt = torch.load(attempt_download(weight), map_location='cpu')  # load


In [None]:
# Testing and results
try:
    # Run validation without saving the `results` object
    print("Starting testing on the labeled test data...")
    model.val(
        data=test_data_yaml,  # Use the YAML file for test data
        save_json=True,       # Save results in JSON format
        save_conf=True,       # Save confidence scores
        project=results_path, # Save results in the specified folder
        name="test_predictions",
        plots=False           # Disable plotting to avoid visualization errors
    )

    # Display validation metrics from YOLO output
    print("\nValidation completed. Check the metrics above and JSON file for detailed results.")
    print(f"JSON results saved at: {os.path.join(results_path, 'test_predictions.json')}")

except Exception as e:
    print(f"Error during validation: {e}")


Ultralytics YOLOv8.0.3 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (NVIDIA A100-SXM4-40GB, 40514MiB)
Fusing layers... 
Model summary: 168 layers, 3006233 parameters, 0 gradients, 8.1 GFLOPs


Starting testing on the labeled test data...


[34m[1mval: [0mScanning /content/datasets/labeled_data/test/labels.cache... 163 images, 0 backgrounds, 0 corrupt: 100%|██████████| 163/163 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:01<00:00,  5.94it/s]
                   all        163        878      0.754      0.795      0.843      0.713
                     V        163        694      0.807      0.856      0.904      0.718
                     C        163        100      0.605       0.66      0.688      0.597
                     S        163         84      0.851      0.869      0.937      0.823
Speed: 0.1ms pre-process, 0.7ms inference, 0.0ms loss, 1.3ms post-process per image
Saving /content/datasets/test_results_v2/test_predictions/predictions.json...



Validation completed. Check the metrics above and JSON file for detailed results.
JSON results saved at: /content/datasets/test_results_v2/test_predictions.json


### Decision on Metrics

The model shows good performance overall, with an mAP@0.5 of 0.843 and mAP@0.5:0.95 of 0.713 across all classes.

- Passenger Vehicles (V): The model works well, with high precision (0.807) and recall (0.856). It also has a strong mAP@0.5 of 0.904.

- Cargo Vehicles (C): The performance for cargo vehicles is average, with precision (0.605) and recall (0.66). This suggests the need for improvement in this category.

- Buses (S): The model performs best for buses, with excellent precision (0.851) and recall (0.869). It has the highest mAP@0.5 of 0.937.

> Conclusion:
The model is suitable for detecting Passenger Vehicles and Buses, but it needs improvement for Cargo Vehicles. It can be used now, but additional data or further training might improve results for cargo vehicles.


In [None]:
# Save testing results to GCP
print("Uploading test results to GCP...")
blobs_to_upload = [os.path.join(dp, f) for dp, dn, filenames in os.walk(results_path) for f in filenames]

for local_file in blobs_to_upload:
    relative_path = os.path.relpath(local_file, results_path)
    gcs_file_path = os.path.join(gcs_results_path, relative_path)
    blob = bucket.blob(gcs_file_path)
    blob.upload_from_filename(local_file)

print(f"Test results uploaded to GCP at: {gcs_results_path}")

Uploading test results to GCP...
Test results uploaded to GCP at: models/test_results/test_data_v2
