<a href="https://colab.research.google.com/github/jakubstenc/Array_navrhy/blob/master/pollen_viability.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/ultralytics/yolov5 # Clone the repository
!cd yolov5
!pip install -r requirements.txt # Install dependencies

Cloning into 'yolov5'...
remote: Enumerating objects: 17496, done.[K
remote: Counting objects: 100% (5/5), done.[K
remote: Compressing objects: 100% (5/5), done.[K
remote: Total 17496 (delta 2), reused 0 (delta 0), pack-reused 17491 (from 3)[K
Receiving objects: 100% (17496/17496), 16.54 MiB | 9.42 MiB/s, done.
Resolving deltas: 100% (11990/11990), done.
[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'[0m[31m
[0m

In [None]:

# Import necessary libraries
import os
from google.colab import drive
import zipfile
import shutil # Added for copying files
import cv2 # For image loading and saving (used for image processing if needed)
import numpy as np # Used for numerical operations
# --- 1. Mount Google Drive ---
# This step connects your Google Colab environment to your Google Drive.
# A pop-up will appear asking for authentication.
print("Mounting Google Drive...")
drive.mount('/content/drive')
print("Google Drive mounted successfully!")

# Define the base path to your 'Pollen_viability' folder on Google Drive.
# IMPORTANT: Adjust this path if your 'Pollen_viability' folder is in a different location.
# For example, if it's directly in 'My Drive', the path would be '/content/drive/My Drive/Pollen_viability'.
# If it's inside another folder like 'YOLO_Projects', it would be '/content/drive/My Drive/YOLO_Projects/Pollen_viability'.
base_drive_path = '/content/drive/My Drive/Pollen_viability' # Assuming 'Pollen_viability' is directly in My Drive

# Define paths for images and labels based on your description
training_images_path = os.path.join(base_drive_path, 'training images')
zipped_labels_path = os.path.join(base_drive_path, 'labels.zip') # Assuming the zipped labels folder is named 'labels.zip'

# Define a destination directory for unzipped labels and organized data
# This will be created in your Colab environment, not on Google Drive initially.
colab_data_dir = '/content/yolov5_data'
os.makedirs(colab_data_dir, exist_ok=True)
print(f"Created data directory in Colab: {colab_data_dir}")

# --- 2. Unzip Labels ---
# Unzip the 'labels.zip' file into the Colab data directory.
if os.path.exists(zipped_labels_path):
    print(f"Unzipping labels from: {zipped_labels_path} to {colab_data_dir}/labels")
    with zipfile.ZipFile(zipped_labels_path, 'r') as zip_ref:
        zip_ref.extractall(os.path.join(colab_data_dir, 'labels'))
    print("Labels unzipped successfully!")
else:
    print(f"Error: Zipped labels file not found at {zipped_labels_path}. Please check the path and filename.")

# --- 3. Prepare Data for YOLOv5 (Structuring) ---
# YOLOv5 expects a specific directory structure for training data.
# Typically:
# yolov5_data/
# ├── images/
# │   └── train/
# │       ├── img1.jpg
# │       └── img2.jpg
# ├── labels/
# │   └── train/
# │       ├── img1.txt
# │       └── img2.txt
#
# We will create these subdirectories and symlink/copy the files.
# Using symlinks (symbolic links) is efficient as it doesn't duplicate data,
# but it requires the original files to remain accessible. Copying is also an option.

# Define target directories within the Colab data structure
yolov5_images_train_dir = os.path.join(colab_data_dir, 'images', 'train')
yolov5_labels_train_dir = os.path.join(colab_data_dir, 'labels', 'train')

os.makedirs(yolov5_images_train_dir, exist_ok=True)
os.makedirs(yolov5_labels_train_dir, exist_ok=True)

print(f"Created YOLOv5 image training directory: {yolov5_images_train_dir}")
print(f"Created YOLOv5 label training directory: {yolov5_labels_train_dir}")

# Symlink or Copy images
print(f"Linking/Copying images from {training_images_path} to {yolov5_images_train_dir}...")
if os.path.exists(training_images_path):
    for filename in os.listdir(training_images_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff')):
            src = os.path.join(training_images_path, filename)
            dst = os.path.join(yolov5_images_train_dir, filename)
            try:
                os.symlink(src, dst) # Use symlink to avoid copying
            except FileExistsError:
                pass # Already linked
            except OSError as e:
                # If symlinking fails (e.g., cross-device links, though rare with Drive), copy instead
                print(f"Warning: Symlink failed for {filename}, attempting to copy. Error: {e}")
                shutil.copy(src, dst)
    print("Images linked/copied successfully!")
else:
    print(f"Error: Training images folder not found at {training_images_path}. Please check the path.")

# Symlink or Copy labels
# Assuming unzipped labels are directly in 'colab_data_dir/labels'
unzipped_labels_source_path = os.path.join(colab_data_dir, 'labels')
print(f"Linking/Copying labels from {unzipped_labels_source_path} to {yolov5_labels_train_dir}...")
if os.path.exists(unzipped_labels_source_path):
    for filename in os.listdir(unzipped_labels_source_path):
        if filename.lower().endswith('.txt'): # Assuming YOLO labels are .txt files
            src = os.path.join(unzipped_labels_source_path, filename)
            dst = os.path.join(yolov5_labels_train_dir, filename)
            try:
                os.symlink(src, dst) # Use symlink
            except FileExistsError:
                pass # Already linked
            except OSError as e:
                print(f"Warning: Symlink failed for {filename}, attempting to copy. Error: {e}")
                shutil.copy(src, dst)
    print("Labels linked/copied successfully!")
else:
    print(f"Error: Unzipped labels folder not found at {unzipped_labels_source_path}. Please ensure labels.zip was unzipped correctly.")


print("\nData preparation complete!")
print(f"Your original images are now available at: {yolov5_images_train_dir}")
print(f"Your original labels are now available at: {yolov5_labels_train_dir}")
print(f"The base directory for your YOLOv5 data is: {colab_data_dir}")


Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive mounted successfully!
Created data directory in Colab: /content/yolov5_data
Unzipping labels from: /content/drive/My Drive/Pollen_viability/labels.zip to /content/yolov5_data/labels
Labels unzipped successfully!
Created YOLOv5 image training directory: /content/yolov5_data/images/train
Created YOLOv5 label training directory: /content/yolov5_data/labels/train
Linking/Copying images from /content/drive/My Drive/Pollen_viability/training images to /content/yolov5_data/images/train...
Images linked/copied successfully!
Linking/Copying labels from /content/yolov5_data/labels to /content/yolov5_data/labels/train...
Labels linked/copied successfully!

Data preparation complete!
Your original images are now available at: /content/yolov5_data/images/train
Your original labels are now available at: /content/yolov5_data/labels/trai

In [None]:

# --- 2. YOLOv5 Setup and Dataset YAML ---
yolov5_repo_path = '/content/yolov5'
if not os.path.exists(yolov5_repo_path):
    !git clone https://github.com/ultralytics/yolov5.git {yolov5_repo_path}

%cd {yolov5_repo_path}
!pip install -r requirements.txt

# Create dataset YAML
dataset_yaml_content = f"""
train: {yolov5_images_train_dir}
val: {yolov5_images_train_dir}
nc: 2 # CHANGE THIS TO YOUR ACTUAL NUMBER OF CLASSES
names: ['viable_pollen', 'non_viable_pollen'] # CHANGE THIS TO YOUR ACTUAL CLASS NAMES, e.g., ['viable_pollen', 'non_viable_pollen']
"""
dataset_yaml_path = os.path.join(yolov5_repo_path, 'data', 'pollen_dataset.yaml')
with open(dataset_yaml_path, 'w') as f:
    f.write(dataset_yaml_content)

# --- 3. Training Command ---
print("\n--- Training Command ---")
print("Run the following command in a NEW CELL to start training:")
print(f'WANDB_MODE="disabled" !python train.py --img 640 --batch 16 --epochs 100 --data {dataset_yaml_path} --weights yolov5s.pt --cache --project runs/train --name pollen_detection')



/content/yolov5
Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting ultralytics>=8.2.34 (from -r requirements.txt (line 18))
  Downloading ultralytics-8.3.159-py3-none-any.whl.metadata (37 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->-r requirements.txt (line 15))
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->-r requirements.txt (line 15))
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->-r requirements.txt (line 15))
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->-r requirements.txt (line 15))
  Downloading nvidia_cudnn_cu12-9.1.0.70

In [None]:
import os

yolov5_repo_path = '/content/yolov5' # Make sure this path is correct

# Define the content of the custom hyperparameter YAML file
# You can adjust these values to control the augmentations and other training settings
custom_hyp_content = """
# Hyperparameters for YOLOv5 training (customizable)
# Augmentation settings:
degrees: 15.0  # image rotation (+/- degrees)
translate: 0.1  # image translation (+/- fraction)
scale: 0.0  # image scaling (factor)
shear: 0.0  # image shear (+/- degrees)
perspective: 0.0005  # image perspective (+/- fraction), range 0-0.001
flipud: 0.3  # flip image upside down (probability)
fliplr: 0.75  # flip image left-right (probability)
mosaic: 1.0  # 0.0 to 1.0
mixup: 0.0  # 0.0 to 1.0
copy_paste: 0.0 # segment copy-paste (0.0 to 1.0)

# Other training hyperparameters (you can adjust these too)
lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
lrf: 0.01  # final learning rate (with cos scheduler)
momentum: 0.937  # SGD momentum/Adam beta1
weight_decay: 0.0005  # optimizer weight decay 5E-4
warmup_epochs: 3.0  # warmup epochs
warmup_momentum: 0.8  # warmup initial momentum
warmup_bias_lr: 0.1  # warmup initial bias lr
box: 0.05  # box loss gain
cls: 0.5  # cls loss gain
cls_pw: 1.0  # cls BCELoss positive_weight
obj: 1.0  # obj loss gain (composite)
obj_pw: 1.0  # obj BCELoss positive_weight
iou_t: 0.20  # IoU training threshold
anchor_t: 4.0  # anchor-multiple threshold
fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
hsv_v: 0.4  # image HSV-Value augmentation (fraction)
"""

# Define the path for the custom hyperparameter file
custom_hyp_path = os.path.join(yolov5_repo_path, 'data', 'hyps', 'custom_hyp.yaml')

# Ensure the hyps directory exists
os.makedirs(os.path.dirname(custom_hyp_path), exist_ok=True)

# Write the content to the file
with open(custom_hyp_path, 'w') as f:
    f.write(custom_hyp_content)

print(f"Custom hyperparameter file created at: {custom_hyp_path}")
print("You can now edit this file to customize augmentation settings.")
print(f"To use this file for training, add --hyp {custom_hyp_path} to your train command.")

Custom hyperparameter file created at: /content/yolov5/data/hyps/custom_hyp.yaml
You can now edit this file to customize augmentation settings.
To use this file for training, add --hyp /content/yolov5/data/hyps/custom_hyp.yaml to your train command.


In [None]:
# Example: Train YOLOv5s on the COCO128 dataset for 3 epochs
!python /content/yolov5/train.py --img 640 --batch 16 --epochs 1550 --data /content/yolov5/data/pollen_dataset.yaml --weights yolov5s.pt --hyp /content/yolov5/data/hyps/custom_hyp.yaml

2025-06-25 17:27:17.495012: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750872437.520828   16860 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750872437.528172   16860 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice: (30 second timeout) 
[34m[1mwandb[0m: W&B disabled due to login timeout.
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=/content/yolov5/data/pollen_dataset.yaml, hyp=/content/yolov5/data/hyps/custom_hyp.yaml, epochs=1550, batch_size=16, imgsz=

In [None]:
import os
import pandas as pd
import re # For parsing YAML content

# Re-use paths from the previous setup
# Make sure to run the previous cell that mounts drive and sets up paths first!
# If this cell is run independently, these paths need to be defined again or passed.
try:
    yolov5_labels_train_dir = '/content/yolov5_data/labels/train'
    yolov5_repo_path = '/content/yolov5'
    dataset_yaml_path = os.path.join(yolov5_repo_path, 'data', 'pollen_dataset.yaml')
    # Read the dataset YAML content to get class names
    with open(dataset_yaml_path, 'r') as f:
        dataset_yaml_content = f.read()

    # Extract class names and number of classes from the YAML content using regex
    # This assumes names are on a single line like: names: ['class1', 'class2']
    names_match = re.search(r"names:\s*\[(.*?)\]", dataset_yaml_content)
    nc_match = re.search(r"nc:\s*(\d+)", dataset_yaml_content)

    class_names = []
    if names_match:
        # Split by comma, remove quotes and strip whitespace
        class_names = [name.strip().strip("'\"") for name in names_match.group(1).split(',')]
    else:
        print("Warning: Could not parse class names from dataset YAML. Using generic 'class_X'.")
        if nc_match:
            num_classes = int(nc_match.group(1))
            class_names = [f'class_{i}' for i in range(num_classes)]
        else:
            print("Warning: Could not parse number of classes either. Class names might be incorrect.")

except Exception as e:
    print(f"Error accessing paths or parsing YAML. Ensure previous cells ran successfully. Error: {e}")
    # Provide default paths if an error occurs to allow the script to continue (though results might be empty)
    yolov5_labels_train_dir = '/content/yolov5_data/labels/train'
    class_names = ['class_0', 'class_1'] # Fallback if class names can't be loaded


print(f"Analyzing labels from: {yolov5_labels_train_dir}")
print(f"Detected class names: {class_names}")

# Dictionary to store counts per image
image_class_counts = {}

# Iterate through each label file in the training labels directory
label_files = [f for f in os.listdir(yolov5_labels_train_dir) if f.endswith('.txt')]

if not label_files:
    print(f"No label files found in {yolov5_labels_train_dir}. Please ensure your labels are unzipped and linked correctly.")
else:
    for label_filename in label_files:
        image_name = os.path.splitext(label_filename)[0] # Get image name without extension
        label_path = os.path.join(yolov5_labels_train_dir, label_filename)

        current_image_counts = {name: 0 for name in class_names} # Initialize counts for this image

        try:
            with open(label_path, 'r') as f:
                for line in f:
                    parts = line.strip().split()
                    if parts:
                        class_id = int(parts[0])
                        if class_id < len(class_names):
                            current_image_counts[class_names[class_id]] += 1
                        else:
                            print(f"Warning: Class ID {class_id} in {label_filename} is out of bounds for defined class names. Skipping.")
                            # Add a generic name for unknown classes or expand class_names if needed
                            if f'unknown_class_{class_id}' not in current_image_counts:
                                current_image_counts[f'unknown_class_{class_id}'] = 0
                            current_image_counts[f'unknown_class_{class_id}'] += 1
        except Exception as e:
            print(f"Error reading or parsing label file {label_filename}: {e}")
            continue # Skip to next file

        image_class_counts[image_name] = current_image_counts

    # Convert the dictionary to a Pandas DataFrame
    df = pd.DataFrame.from_dict(image_class_counts, orient='index').fillna(0).astype(int)

    # Optional: Add a 'Total Objects' column
    df['Total Objects'] = df.sum(axis=1)

    # Sort by total objects or image name if desired
    df = df.sort_index()

    print("\n--- Object Counts Per Image (from Training Labels) ---")
    print(df)

    # Optional: Print overall class distribution
    print("\n--- Overall Class Distribution ---")
    print(df[class_names].sum().sort_values(ascending=False)) # Sum counts for each class



Error accessing paths or parsing YAML. Ensure previous cells ran successfully. Error: [Errno 2] No such file or directory: '/content/yolov5/data/pollen_dataset.yaml'
Analyzing labels from: /content/yolov5_data/labels/train
Detected class names: ['class_0', 'class_1']


FileNotFoundError: [Errno 2] No such file or directory: '/content/yolov5_data/labels/train'

In [None]:
# Customize and run the detect.py script
# Modify the arguments below as needed

# Path to your trained model weights
weights_path = '/content/yolov5/runs/train/exp/weights/best.pt' # <<-- REPLACE with your actual weights path

# Source of images/videos for detection
source_path = '/content/drive/My Drive/Pollen_viability/detect images' # <<-- REPLACE with your image/video source

# Confidence threshold (lower to see more detections, potentially including false positives)
confidence_threshold = 0.25

# IoU threshold for Non-Maximum Suppression (adjust based on how much overlap you want to allow for detections of the same object)
iou_threshold = 0.1

# Project and name for saving results
results_project = 'runs/detect'
results_name = 'custom_detection_run' # Give your run a descriptive name

# Whether to save confidence scores and bounding box coordinates to text files
save_confidence = True
save_txt_labels = False # Set to True to save labels in YOLO format

# Construct the command
command = f"""
!python /content/yolov5/detect.py \
    --weights {weights_path} \
    --source "{source_path}" \
    --conf-thres 0.80 \
    --iou-thres {iou_threshold} \
    --project {results_project} \
    --name {results_name} \
    {'--save-conf' if save_confidence else ''} \
    {'--save-txt' if save_txt_labels else ''}
"""

# Print the command before executing (optional)
print("Executing detection command:")
print(command)

# Execute the command
get_ipython().system(command)

Executing detection command:

!python /content/yolov5/detect.py     --weights /content/yolov5/runs/train/exp/weights/best.pt     --source "/content/drive/My Drive/Pollen_viability/detect images"     --conf-thres 0.80     --iou-thres 0.1     --project runs/detect     --name custom_detection_run     --save-conf     

/bin/bash: line 2: !python: command not found


In [None]:
# Run detection using the trained model
# Make sure to replace the placeholder paths with your actual weights and image source path
!python /content/yolov5/detect.py --weights /content/yolov5/runs/train/exp2/weights/best.pt --source "/content/drive/MyDrive/Pollen_viability/detect images" --save-conf

[34m[1mdetect: [0mweights=['/content/yolov5/runs/train/exp2/weights/best.pt'], source=/content/drive/MyDrive/Pollen_viability/detect images, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_format=0, save_csv=False, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-421-g79c4c31d Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)

Fusing layers... 
Model summary: 157 layers, 7015519 parameters, 0 gradients, 15.8 GFLOPs
image 1/8 /content/drive/MyDrive/Pollen_viability/detect images/1-3-F_S4x_BCrop_10.png: 640x640 7 viable_pollens, 11.5ms
image 2/8 /content/drive/MyDrive/Pollen_viability/detect images/1-3-F_S4x_BCrop_7.png: 640x640 12 viable_pollens, 12.8ms
image 3/8