**Loading Dataset and creating dataframe**

In [None]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
from shutil import copy2
from tqdm import tqdm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical
import supervision as sv
import matplotlib.pyplot as plt
from ultralytics import YOLO
from torchvision.ops import box_iou
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [None]:
#CHANGE THE FILE PATHS WITH YOUR RESPECTIVE FILE PATH

labeled_images_path = "/kaggle/input/weed-detection-dataset/labeled/images"
labeled_annotations_path = "/kaggle/input/weed-detection-dataset/labeled/annotations"
test_images_path = "/kaggle/input/weed-detection-dataset/test/images"
test_annotations_path = "/kaggle/input/weed-detection-dataset/test/annotations"
unlabeled_images_path = "/kaggle/input/weed-detection-dataset/unlabeled"

In [26]:



def create_dataframe(images_path, annotations_path):
    data = []
    for image_file in os.listdir(images_path):
        image_path = os.path.join(images_path, image_file)
        annotation_file = image_file.replace(".jpg", ".txt")
        annotation_path = os.path.join(annotations_path, annotation_file)
        
        if os.path.exists(annotation_path):
            with open(annotation_path, "r") as f:
                line = f.readline().strip().split()  # Read first line
                if len(line) == 5:  # Ensure valid YOLO format
                    label, x_centre, y_centre, width, height = map(float, line)
                    data.append({"image": image_path, "label": int(label), 
                                 "x_centre": x_centre, "y_centre": y_centre, 
                                 "width": width, "height": height})
    return pd.DataFrame(data)

df = create_dataframe(labeled_images_path, labeled_annotations_path)
df


Unnamed: 0,image,label,x_centre,y_centre,width,height
0,/kaggle/input/weed-detection-dataset/labeled/i...,1,0.362305,0.416992,0.669922,0.759766
1,/kaggle/input/weed-detection-dataset/labeled/i...,0,0.511719,0.487305,0.953125,0.904297
2,/kaggle/input/weed-detection-dataset/labeled/i...,1,0.437500,0.411133,0.375000,0.341797
3,/kaggle/input/weed-detection-dataset/labeled/i...,1,0.554688,0.445312,0.554688,0.351562
4,/kaggle/input/weed-detection-dataset/labeled/i...,0,0.235352,0.213867,0.087891,0.220703
...,...,...,...,...,...,...
195,/kaggle/input/weed-detection-dataset/labeled/i...,0,0.335938,0.337891,0.632812,0.574219
196,/kaggle/input/weed-detection-dataset/labeled/i...,1,0.499023,0.348633,0.748047,0.603516
197,/kaggle/input/weed-detection-dataset/labeled/i...,1,0.673828,0.279297,0.371094,0.453125
198,/kaggle/input/weed-detection-dataset/labeled/i...,0,0.524414,0.531250,0.767578,0.855469


In [27]:
test_df = create_dataframe(test_images_path, test_annotations_path)

In [28]:
!pip install ultralytics supervision numpy opencv-python torch torchvision




In [30]:
dataset_path = "dataset"
os.makedirs(f"{dataset_path}/images/train", exist_ok=True)
os.makedirs(f"{dataset_path}/images/val", exist_ok=True)
os.makedirs(f"{dataset_path}/labels/train", exist_ok=True)
os.makedirs(f"{dataset_path}/labels/val", exist_ok=True)

print("✅ Dataset folders created.")


✅ Dataset folders created.


In [31]:
def move_images(df, img_folder):
    for img_path in df["image"]:
        img_name = os.path.basename(img_path)
        new_path = os.path.join(img_folder, img_name)
        shutil.copy(img_path, new_path)

# ✅ Move training images
move_images(df, "dataset/images/train")

# ✅ Move validation images (from test_df)
move_images(test_df, "dataset/images/val")

print("✅ Images copied to dataset folders.")


✅ Images copied to dataset folders.


In [32]:
def convert_to_yolo(df, label_folder):
    for _, row in df.iterrows():
        img_name = os.path.basename(row["image"]).replace(".jpg", ".txt")  
        label_file = os.path.join(label_folder, img_name)

        with open(label_file, "w") as f:
            f.write(f"{row['label']} {row['x_centre']} {row['y_centre']} {row['width']} {row['height']}\n")

# ✅ Create labels for training and validation
convert_to_yolo(df, "dataset/labels/train")
convert_to_yolo(test_df, "dataset/labels/val")

print("✅ YOLO labels saved.")


✅ YOLO labels saved.


In [33]:
dataset_abs_path = os.path.abspath("dataset")

yaml_content = f"""
path: {dataset_abs_path}
train: images/train
val: images/val
nc: 2
names: ["weed", "non-weed"]
"""

with open("dataset/dataset.yaml", "w") as f:
    f.write(yaml_content)

print("✅ dataset.yaml created.")


✅ dataset.yaml created.


**Training only on labeled dataset**

In [34]:
model = YOLO("yolov8n.pt")  
model.train(data="dataset/dataset.yaml", epochs=20, imgsz=640)

# ✅ Save the trained model
model_path = "yolo_weed_detector.pt"
model.save(model_path)
print(f"✅ YOLO Model trained and saved at {model_path}")


Ultralytics 8.3.71 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=dataset/dataset.yaml, epochs=20, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, sh

[34m[1mtrain: [0mScanning /kaggle/working/dataset/labels/train.cache... 200 images, 0 backgrounds, 0 corrupt: 100%|██████████| 200/200 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /kaggle/working/dataset/labels/val.cache... 50 images, 0 backgrounds, 0 corrupt: 100%|██████████| 50/50 [00:00<?, ?it/s]


Plotting labels to runs/detect/train2/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train2[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20       2.3G      1.606      3.316      1.989         23        640: 100%|██████████| 13/13 [00:02<00:00,  5.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.02it/s]

                   all         50         50     0.0032       0.96      0.317      0.174






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20      2.19G      1.303      2.489      1.722         25        640: 100%|██████████| 13/13 [00:02<00:00,  6.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.31it/s]

                   all         50         50    0.00357      0.981      0.449      0.275






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20      2.18G       1.23      2.136      1.623         20        640: 100%|██████████| 13/13 [00:02<00:00,  6.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.01it/s]

                   all         50         50      0.643      0.274      0.537      0.272






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20      2.18G      1.219      2.003      1.625         23        640: 100%|██████████| 13/13 [00:02<00:00,  6.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  4.71it/s]

                   all         50         50      0.711      0.527      0.635      0.295






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20      2.18G      1.248      1.853      1.664         27        640: 100%|██████████| 13/13 [00:02<00:00,  6.44it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.07it/s]

                   all         50         50      0.621      0.771      0.717      0.321






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20      2.18G      1.254      1.921      1.662         22        640: 100%|██████████| 13/13 [00:02<00:00,  6.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  4.97it/s]

                   all         50         50      0.593      0.546      0.532      0.259






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20      2.18G      1.312      1.933      1.712         20        640: 100%|██████████| 13/13 [00:02<00:00,  6.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.56it/s]

                   all         50         50      0.491      0.627      0.553      0.267






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20      2.18G      1.259      1.833      1.679         16        640: 100%|██████████| 13/13 [00:02<00:00,  6.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.43it/s]

                   all         50         50      0.576      0.579      0.562      0.334






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20      2.18G      1.219      1.701      1.625         18        640: 100%|██████████| 13/13 [00:02<00:00,  6.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.43it/s]

                   all         50         50      0.813      0.734      0.797      0.399






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20      2.18G      1.251      1.609      1.656         19        640: 100%|██████████| 13/13 [00:02<00:00,  6.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  4.35it/s]

                   all         50         50      0.735      0.685      0.757      0.391





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20      2.18G      1.175      2.106       1.81          8        640: 100%|██████████| 13/13 [00:02<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.51it/s]

                   all         50         50      0.766      0.699      0.695      0.353






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20      2.18G       1.18      2.043       1.78          8        640: 100%|██████████| 13/13 [00:02<00:00,  6.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.93it/s]

                   all         50         50      0.778      0.785      0.829      0.507






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20      2.18G      1.135      1.833      1.698          8        640: 100%|██████████| 13/13 [00:02<00:00,  6.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.08it/s]

                   all         50         50       0.67       0.78      0.781       0.42






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20      2.18G      1.057      1.748      1.695          8        640: 100%|██████████| 13/13 [00:02<00:00,  6.44it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.81it/s]

                   all         50         50      0.679      0.824      0.794        0.5






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20      2.18G          1      1.577      1.619          8        640: 100%|██████████| 13/13 [00:01<00:00,  6.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.72it/s]

                   all         50         50      0.874      0.862      0.859      0.558






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20      2.18G     0.9595      1.518      1.586          8        640: 100%|██████████| 13/13 [00:02<00:00,  6.44it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.34it/s]

                   all         50         50      0.862      0.881        0.9      0.594






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20      2.18G     0.9334      1.475      1.569          8        640: 100%|██████████| 13/13 [00:02<00:00,  6.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.81it/s]

                   all         50         50      0.841      0.841      0.899      0.559






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20      2.18G     0.9147      1.435      1.543          8        640: 100%|██████████| 13/13 [00:01<00:00,  6.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.18it/s]

                   all         50         50      0.882      0.843      0.911      0.593






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20      2.18G     0.8628      1.359      1.459          8        640: 100%|██████████| 13/13 [00:01<00:00,  6.59it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.32it/s]

                   all         50         50      0.938      0.838      0.936      0.632






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/20      2.18G     0.8525      1.308      1.425          8        640: 100%|██████████| 13/13 [00:02<00:00,  6.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.78it/s]

                   all         50         50      0.892      0.897      0.945      0.663






20 epochs completed in 0.018 hours.
Optimizer stripped from runs/detect/train2/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train2/weights/best.pt, 6.2MB

Validating runs/detect/train2/weights/best.pt...
Ultralytics 8.3.71 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 168 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  4.58it/s]


                   all         50         50      0.892      0.897      0.945      0.663
                  weed         24         24      0.905      0.958      0.983      0.769
              non-weed         26         26      0.879      0.836      0.906      0.558




Speed: 0.2ms preprocess, 2.2ms inference, 0.0ms loss, 2.2ms postprocess per image
Results saved to [1mruns/detect/train2[0m
✅ YOLO Model trained and saved at yolo_weed_detector.pt


In [35]:
dataset = sv.DetectionDataset.from_yolo(
    images_directory_path="dataset/images/val",
    annotations_directory_path="dataset/labels/val",
    data_yaml_path="dataset/dataset.yaml"
)

In [36]:
def callback(image: np.ndarray) -> sv.Detections:
    result = model(image)[0]  
    return sv.Detections.from_ultralytics(result)


In [60]:
def run_yolo_on_test(test_df, model):
    y_true, y_pred = [], []

    for _, row in test_df.iterrows():
        image_path = row["image"]
        true_label = row["label"]

        results = model(image_path)

        if len(results[0].boxes) == 0:
            y_pred.append(1)  # If no detection, assume it's non-weed
        else:
            best_idx = results[0].boxes.conf.argmax().item()
            best_label = int(results[0].boxes.cls[best_idx].cpu().numpy())
            y_pred.append(best_label)

        y_true.append(true_label)

    return y_true, y_pred

# ✅ Run YOLO and collect predictions
y_true, y_pred = run_yolo_on_test(test_df, model)

# ✅ Compute F1 Score
f1 = f1_score(y_true, y_pred, average="macro")


mean_average_precision = sv.MeanAveragePrecision.benchmark(
    dataset=dataset,
    callback=callback
)

mAP = mean_average_precision.map50_95

final_score = 0.5 * f1 + 0.5 * mAP
print(f"Final Score: {final_score:.4f}")




image 1/1 /kaggle/input/weed-detection-dataset/test/images/agri_0_9430_jpeg.rf.55590fd568cfd05cb5a5f38a69781ce9.jpg: 640x640 (no detections), 9.5ms
Speed: 3.0ms preprocess, 9.5ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /kaggle/input/weed-detection-dataset/test/images/agri_0_9266_jpeg.rf.190539e7297457c48131e14d0eaf9823.jpg: 640x640 (no detections), 7.3ms
Speed: 2.2ms preprocess, 7.3ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /kaggle/input/weed-detection-dataset/test/images/agri_0_9881_jpeg.rf.ee55ad20cd1edd1f366c6d2641677d66.jpg: 640x640 (no detections), 7.3ms
Speed: 2.2ms preprocess, 7.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /kaggle/input/weed-detection-dataset/test/images/agri_0_9532_jpeg.rf.cec78142a3ad2cbd2466c222131f846a.jpg: 640x640 (no detections), 7.3ms
Speed: 2.1ms preprocess, 7.3ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /kaggle/i

**Utilising unlabeled images**

In [41]:
# Load the trained YOLO model
model = YOLO("yolo_weed_detector.pt")
print("✅ YOLO Model loaded.")


✅ YOLO Model loaded.


In [42]:
def generate_labels(image_folder, model):
    """
    Run YOLO on all images in `image_folder` and return a DataFrame with predictions.
    """
    data = []

    for img_name in os.listdir(image_folder):
        img_path = os.path.join(image_folder, img_name)
        image = cv2.imread(img_path)

        if image is None:
            print(f"⚠️ Skipping {img_name}, unable to read.")
            continue

        results = model(img_path)

        if len(results[0].boxes) == 0:
            print(f"⚠️ No detections in {img_name}. Assigning default label 1 (non-weed).")
            data.append([img_path, 1, 0, 0, 0, 0])  # Default label for no detection
            continue

        for result in results:
            boxes = result.boxes.xywh.cpu().numpy()  # Convert to YOLO format (x_center, y_center, width, height)
            labels = result.boxes.cls.cpu().numpy()  # Class labels

            for box, label in zip(boxes, labels):
                x_centre, y_centre, width, height = box
                label = int(label)  # Convert label to integer
                data.append([img_path, label, x_centre, y_centre, width, height])

    # Create a DataFrame
    new_df = pd.DataFrame(data, columns=["image", "label", "x_centre", "y_centre", "width", "height"])
    
    return new_df


In [None]:
# Generate labels for unlabeled images
new_df = generate_labels(unlabeled_images_path, model)

# Display first few rows of generated labels
import ace_tools as tools
tools.display_dataframe_to_user(name="Generated Labels", dataframe=new_df)

print("✅ Labels generated and stored in new_df.")


In [45]:
new_df

Unnamed: 0,image,label,x_centre,y_centre,width,height
0,/kaggle/input/weed-detection-dataset/unlabeled...,1,256.274933,260.519348,502.361267,363.015961
1,/kaggle/input/weed-detection-dataset/unlabeled...,1,274.809814,252.528229,284.254028,186.668259
2,/kaggle/input/weed-detection-dataset/unlabeled...,0,143.656860,439.980133,37.659081,30.384888
3,/kaggle/input/weed-detection-dataset/unlabeled...,0,93.310394,284.730347,30.548927,29.696014
4,/kaggle/input/weed-detection-dataset/unlabeled...,0,177.002319,98.791946,36.021729,31.420944
...,...,...,...,...,...,...
1139,/kaggle/input/weed-detection-dataset/unlabeled...,0,168.335281,130.319901,302.129913,258.858490
1140,/kaggle/input/weed-detection-dataset/unlabeled...,0,208.441711,241.021637,414.060364,482.043274
1141,/kaggle/input/weed-detection-dataset/unlabeled...,0,294.867249,113.974419,231.541031,227.948837
1142,/kaggle/input/weed-detection-dataset/unlabeled...,0,356.019257,139.217270,164.814453,277.630890


In [46]:
# Normalizing numerical columns to ensure values are between 0 and 1
numerical_columns = ["x_centre", "y_centre", "width", "height"]

# Applying min-max normalization
new_df[numerical_columns] = new_df[numerical_columns].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
new_df

Unnamed: 0,image,label,x_centre,y_centre,width,height
0,/kaggle/input/weed-detection-dataset/unlabeled...,1,0.555321,0.581240,0.981314,0.713919
1,/kaggle/input/weed-detection-dataset/unlabeled...,1,0.595485,0.563411,0.555263,0.367108
2,/kaggle/input/weed-detection-dataset/unlabeled...,0,0.311290,0.981632,0.073563,0.059756
3,/kaggle/input/weed-detection-dataset/unlabeled...,0,0.202194,0.635257,0.059674,0.058401
4,/kaggle/input/weed-detection-dataset/unlabeled...,0,0.383546,0.220413,0.070365,0.061793
...,...,...,...,...,...,...
1139,/kaggle/input/weed-detection-dataset/unlabeled...,0,0.364765,0.290754,0.590182,0.509080
1140,/kaggle/input/weed-detection-dataset/unlabeled...,0,0.451672,0.537739,0.808827,0.948002
1141,/kaggle/input/weed-detection-dataset/unlabeled...,0,0.638947,0.254286,0.452293,0.448292
1142,/kaggle/input/weed-detection-dataset/unlabeled...,0,0.771457,0.310605,0.321949,0.545998


In [48]:
import pandas as pd

comb_df = pd.concat([new_df, df], ignore_index=True)
comb_df

Unnamed: 0,image,label,x_centre,y_centre,width,height
0,/kaggle/input/weed-detection-dataset/unlabeled...,1,0.555321,0.581240,0.981314,0.713919
1,/kaggle/input/weed-detection-dataset/unlabeled...,1,0.595485,0.563411,0.555263,0.367108
2,/kaggle/input/weed-detection-dataset/unlabeled...,0,0.311290,0.981632,0.073563,0.059756
3,/kaggle/input/weed-detection-dataset/unlabeled...,0,0.202194,0.635257,0.059674,0.058401
4,/kaggle/input/weed-detection-dataset/unlabeled...,0,0.383546,0.220413,0.070365,0.061793
...,...,...,...,...,...,...
1339,/kaggle/input/weed-detection-dataset/labeled/i...,0,0.335938,0.337891,0.632812,0.574219
1340,/kaggle/input/weed-detection-dataset/labeled/i...,1,0.499023,0.348633,0.748047,0.603516
1341,/kaggle/input/weed-detection-dataset/labeled/i...,1,0.673828,0.279297,0.371094,0.453125
1342,/kaggle/input/weed-detection-dataset/labeled/i...,0,0.524414,0.531250,0.767578,0.855469


In [49]:
dataset_path = "dataset"
os.makedirs(f"{dataset_path}/images/train", exist_ok=True)
os.makedirs(f"{dataset_path}/images/val", exist_ok=True)
os.makedirs(f"{dataset_path}/labels/train", exist_ok=True)
os.makedirs(f"{dataset_path}/labels/val", exist_ok=True)

print("✅ Dataset folders created.")


✅ Dataset folders created.


In [50]:
def move_images(df, img_folder):
    for img_path in df["image"]:
        img_name = os.path.basename(img_path)
        new_path = os.path.join(img_folder, img_name)
        shutil.copy(img_path, new_path)

# ✅ Move training images
move_images(comb_df, "dataset/images/train")

# ✅ Move validation images (from test_df)
move_images(test_df, "dataset/images/val")

print("✅ Images copied to dataset folders.")


✅ Images copied to dataset folders.


In [51]:
def convert_to_yolo(df, label_folder):
    for _, row in df.iterrows():
        img_name = os.path.basename(row["image"]).replace(".jpg", ".txt")  
        label_file = os.path.join(label_folder, img_name)

        with open(label_file, "w") as f:
            f.write(f"{row['label']} {row['x_centre']} {row['y_centre']} {row['width']} {row['height']}\n")

# ✅ Create labels for training and validation
convert_to_yolo(df, "dataset/labels/train")
convert_to_yolo(test_df, "dataset/labels/val")

print("✅ YOLO labels saved.")


✅ YOLO labels saved.


In [52]:
dataset_abs_path = os.path.abspath("dataset")

yaml_content = f"""
path: {dataset_abs_path}
train: images/train
val: images/val
nc: 2
names: ["weed", "non-weed"]
"""

with open("dataset/dataset.yaml", "w") as f:
    f.write(yaml_content)

print("✅ dataset.yaml created.")


✅ dataset.yaml created.


**Training on labeled+unlabeled**

In [53]:
model = YOLO("yolov8n.pt")  
model.train(data="dataset/dataset.yaml", epochs=20, imgsz=640)

# ✅ Save the trained model
model_path = "yolo_weed_detector.pt"
model.save(model_path)
print(f"✅ YOLO Model trained and saved at {model_path}")


Ultralytics 8.3.71 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=dataset/dataset.yaml, epochs=20, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, sh

[34m[1mtrain: [0mScanning /kaggle/working/dataset/labels/train... 200 images, 1000 backgrounds, 0 corrupt: 100%|██████████| 1200/1200 [00:00<00:00, 1770.01it/s]


[34m[1mtrain: [0mNew cache created: /kaggle/working/dataset/labels/train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /kaggle/working/dataset/labels/val.cache... 50 images, 0 backgrounds, 0 corrupt: 100%|██████████| 50/50 [00:00<?, ?it/s]


Plotting labels to runs/detect/train3/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train3[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20      2.24G      1.499      12.51      1.884          8        640: 100%|██████████| 75/75 [00:13<00:00,  5.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  4.22it/s]

                   all         50         50      0.695      0.208      0.275     0.0886






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20       2.2G       1.65       6.55      1.995          8        640: 100%|██████████| 75/75 [00:12<00:00,  6.00it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.13it/s]

                   all         50         50    0.00279      0.804     0.0428     0.0112






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20      2.27G      1.782      4.734      2.149          5        640: 100%|██████████| 75/75 [00:12<00:00,  6.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.49it/s]

                   all         50         50     0.0147      0.288     0.0812      0.016






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20      2.27G      1.858      4.356      2.168          3        640: 100%|██████████| 75/75 [00:11<00:00,  6.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.63it/s]

                   all         50         50    0.00242      0.724     0.0341    0.00987






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20      2.27G      1.957      4.383      2.241          7        640: 100%|██████████| 75/75 [00:11<00:00,  6.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.29it/s]

                   all         50         50     0.0123      0.883     0.0859     0.0299






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20      2.18G      1.909      3.751      2.183          4        640: 100%|██████████| 75/75 [00:12<00:00,  6.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.32it/s]

                   all         50         50     0.0143      0.489     0.0669     0.0185






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20      2.18G      1.931      3.451      2.145          4        640: 100%|██████████| 75/75 [00:12<00:00,  6.22it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.27it/s]

                   all         50         50     0.0254      0.787      0.227     0.0722






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20      2.18G       1.76      3.137      2.054          6        640: 100%|██████████| 75/75 [00:12<00:00,  6.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.94it/s]

                   all         50         50      0.143      0.748      0.375      0.133






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20      2.18G       1.71      3.027      2.003          8        640: 100%|██████████| 75/75 [00:11<00:00,  6.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.96it/s]

                   all         50         50     0.0481      0.941       0.53      0.183






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20      2.18G      1.714      3.209      1.985          6        640: 100%|██████████| 75/75 [00:11<00:00,  6.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.54it/s]

                   all         50         50      0.162      0.881       0.51      0.218





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20      2.18G        1.5      3.619      2.081          2        640: 100%|██████████| 75/75 [00:12<00:00,  5.90it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.72it/s]

                   all         50         50     0.0281      0.804      0.382      0.188






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20      2.18G      1.394      3.125      2.018          1        640: 100%|██████████| 75/75 [00:11<00:00,  6.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.27it/s]

                   all         50         50      0.461      0.713      0.574       0.29






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20      2.18G      1.475      3.108      2.082          3        640: 100%|██████████| 75/75 [00:11<00:00,  6.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.92it/s]

                   all         50         50      0.111      0.921      0.679      0.332






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20      2.18G      1.389      2.708       2.05          2        640: 100%|██████████| 75/75 [00:11<00:00,  6.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.18it/s]

                   all         50         50      0.701      0.679      0.794      0.472






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20      2.18G      1.329      2.861      1.933          4        640: 100%|██████████| 75/75 [00:11<00:00,  6.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.42it/s]

                   all         50         50      0.139       0.96      0.832      0.511






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20      2.18G       1.27      2.775      1.816          2        640: 100%|██████████| 75/75 [00:11<00:00,  6.30it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.57it/s]

                   all         50         50      0.314      0.883      0.804      0.432






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20      2.18G      1.276      2.587      1.811          2        640: 100%|██████████| 75/75 [00:11<00:00,  6.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.86it/s]

                   all         50         50      0.787      0.739      0.834      0.521






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20      2.18G      1.152      2.519      1.706          3        640: 100%|██████████| 75/75 [00:11<00:00,  6.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.59it/s]

                   all         50         50      0.783      0.839      0.852      0.525






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20      2.18G      1.255      2.572      1.817          7        640: 100%|██████████| 75/75 [00:11<00:00,  6.39it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.15it/s]

                   all         50         50      0.825      0.847      0.888      0.562






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/20      2.18G      1.185      2.539      1.759          1        640: 100%|██████████| 75/75 [00:11<00:00,  6.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.11it/s]

                   all         50         50      0.824      0.842        0.9       0.59






20 epochs completed in 0.074 hours.
Optimizer stripped from runs/detect/train3/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train3/weights/best.pt, 6.2MB

Validating runs/detect/train3/weights/best.pt...
Ultralytics 8.3.71 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 168 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.10it/s]


                   all         50         50      0.818      0.847      0.899       0.59
                  weed         24         24      0.842      0.886      0.955      0.645
              non-weed         26         26      0.795      0.808      0.843      0.536




Speed: 0.2ms preprocess, 2.2ms inference, 0.0ms loss, 1.6ms postprocess per image
Results saved to [1mruns/detect/train3[0m
✅ YOLO Model trained and saved at yolo_weed_detector.pt


In [54]:
dataset = sv.DetectionDataset.from_yolo(
    images_directory_path="dataset/images/val",
    annotations_directory_path="dataset/labels/val",
    data_yaml_path="dataset/dataset.yaml"
)

In [55]:
def callback(image: np.ndarray) -> sv.Detections:
    result = model(image)[0]  
    return sv.Detections.from_ultralytics(result)


In [38]:
def run_yolo_on_test(test_df, model):
    y_true, y_pred = [], []

    for _, row in test_df.iterrows():
        image_path = row["image"]
        true_label = row["label"]

        results = model(image_path)

        if len(results[0].boxes) == 0:
            y_pred.append(1)  # If no detection, assume it's non-weed
        else:
            best_idx = results[0].boxes.conf.argmax().item()
            best_label = int(results[0].boxes.cls[best_idx].cpu().numpy())
            y_pred.append(best_label)

        y_true.append(true_label)

    return y_true, y_pred

# ✅ Run YOLO and collect predictions
y_true, y_pred = run_yolo_on_test(test_df, model)

# ✅ Compute F1 Score
f1 = f1_score(y_true, y_pred, average="macro")
print(f"✅ F1 Score: {f1:.4f}")



image 1/1 /kaggle/input/weed-detection-dataset/test/images/agri_0_9430_jpeg.rf.55590fd568cfd05cb5a5f38a69781ce9.jpg: 640x640 1 weed, 7.6ms
Speed: 2.3ms preprocess, 7.6ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /kaggle/input/weed-detection-dataset/test/images/agri_0_9266_jpeg.rf.190539e7297457c48131e14d0eaf9823.jpg: 640x640 1 non-weed, 6.0ms
Speed: 2.1ms preprocess, 6.0ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /kaggle/input/weed-detection-dataset/test/images/agri_0_9881_jpeg.rf.ee55ad20cd1edd1f366c6d2641677d66.jpg: 640x640 1 non-weed, 6.0ms
Speed: 2.1ms preprocess, 6.0ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /kaggle/input/weed-detection-dataset/test/images/agri_0_9532_jpeg.rf.cec78142a3ad2cbd2466c222131f846a.jpg: 640x640 1 non-weed, 5.9ms
Speed: 2.2ms preprocess, 5.9ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /kaggle/input/weed-detection-data

In [37]:
mean_average_precision = sv.MeanAveragePrecision.benchmark(
    dataset=dataset,
    callback=callback
)

mAP = mean_average_precision.map50_95
print(f"✅ mAP@[0.5:0.95]: {mAP:.4f}")



0: 640x640 2 weeds, 7.4ms
Speed: 2.3ms preprocess, 7.4ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 weed, 7.4ms
Speed: 2.6ms preprocess, 7.4ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 non-weeds, 7.4ms
Speed: 2.6ms preprocess, 7.4ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 weed, 7.3ms
Speed: 2.4ms preprocess, 7.3ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 non-weed, 7.3ms
Speed: 2.4ms preprocess, 7.3ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 weed, 7.3ms
Speed: 2.3ms preprocess, 7.3ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 weed, 7.4ms
Speed: 2.3ms preprocess, 7.4ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 weed, 7.3ms
Speed: 2.3ms preprocess, 7.3ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 

In [39]:
final_score = 0.5 * f1 + 0.5 * mAP
print(f"Final Score: {final_score:.4f}")


Final Score: 0.8456


As we can see our performance has improved by a lot