In [1]:
import os
import shutil
import random

# Paths (adjust these to match your Kaggle dataset paths)
image_dir = "/kaggle/input/fawryvideos/train/images"
label_dir = "/kaggle/input/fawryvideos/train/labels"
dataset_dir = "/kaggle/working"

# Create train/val directories for images and labels
for split in ["train", "val"]:
    os.makedirs(os.path.join(dataset_dir, "images", split), exist_ok=True)
    os.makedirs(os.path.join(dataset_dir, "labels", split), exist_ok=True)

# List all image files and sort them
all_images = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
print(f"Total images found: {len(all_images)}")
# # Filter: only select every 50th frame (e.g. frame 000050, 000100, etc.)
# selected_images = []
# for img in all_images:
#     try:
#         # Extract the numeric part from the filename (e.g., "000050" from "000050.jpg")
#         # 1 frame from each 25 for video 02
#         # 1 frame from each 50 for videos 03 & 05
#         frame_number = int(os.path.splitext(img)[0])
#         if frame_number % 25 == 0:
#             selected_images.append(img)
#     except ValueError:
#         # If the filename doesn't represent a number, skip it.
#         continue

# Shuffle the selected images
random.shuffle(all_images)
# print(f"Images after filtering every 25th frame: {len(selected_images)}")

# Split: 80% train, 20% val
train_size = int(0.8 * len(all_images))
train_images = all_images[:train_size]
val_images = all_images[train_size:]

def copy_files(image_list, split):
    for img in image_list:
        # Copy the image
        src_img = os.path.join(image_dir, img)
        dst_img = os.path.join(dataset_dir, "images", split, img)
        shutil.copy(src_img, dst_img)
        
        # Derive the corresponding label file name
        label_filename = os.path.splitext(img)[0] + ".txt"
        src_label = os.path.join(label_dir, label_filename)
        dst_label = os.path.join(dataset_dir, "labels", split, label_filename)
        
        # Only copy the label if it exists (ensuring we filter labels as well)
        if os.path.exists(src_label):
            shutil.copy(src_label, dst_label)
        else:
            print(f"Label file {src_label} not found. Skipping label for {img}.")

# Copy files for train and validation splits
copy_files(train_images, "train")
copy_files(val_images, "val")
print(f"Images after filtering every 25th frame: {len(train_images)}")
print(f"Images after filtering every 25th frame: {len(val_images)}")

print("Dataset split complete.")

Total images found: 389
Images after filtering every 25th frame: 311
Images after filtering every 25th frame: 78
Dataset split complete.


In [2]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.111-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cusolver_cu12-11.6

In [None]:
import os
import torch
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
torch.cuda.empty_cache()  # clear any unused memory

from ultralytics import YOLO

# Enable mixed precision training if supported (check documentation if additional flag is needed)
model = YOLO("/kaggle/input/yolov11x/pytorch/default/1/yolo11x.pt")
data_yaml = "/kaggle/input/yaml-without-mot/data.yaml"

# Train with a lower batch size, lower image size, and consider using gradient accumulation if available.
model.train(
    data=data_yaml,
    epochs=128,
    imgsz=640,      # lower image resolution
    batch=16,        # further reduce batch size
    name='custom_yolov11_retail',
    patience=5
)

custom_save_path = "/kaggle/working/custom_yolov11_retail_best_imgsz.pt"
model.save(custom_save_path)
print(f"Model weights saved to {custom_save_path}")

# Optionally, if your training loop supports gradient accumulation, check the documentation on how to enable it.


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Ultralytics 8.3.111 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=/kaggle/input/yolov11x/pytorch/default/1/yolo11x.pt, data=/kaggle/input/yaml-without-mot/data.yaml, epochs=128, time=None, patience=5, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=custom_yolov11_retail, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, v

100%|██████████| 755k/755k [00:00<00:00, 16.6MB/s]
E0000 00:00:1745024325.435467      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745024325.558425      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1      2784  ultralytics.nn.modules.conv.Conv             [3, 96, 3, 2]                 
  1                  -1  1    166272  ultralytics.nn.modules.conv.Conv             [96, 192, 3, 2]               
  2                  -1  2    389760  ultralytics.nn.modules.block.C3k2            [192, 384, 2, True, 0.25]     
  3                  -1  1   1327872  ultralytics.nn.modules.conv.Conv             [384, 384, 3, 2]              
  4                  -1  2   1553664  ultralytics.nn.modules.block.C3k2            [384, 768, 2, True, 0.25]     
  5                  -1  1   5309952  ultralytics.nn.modules.conv.Conv             [768, 768, 3, 2]              
  6                  -1  2   5022720  ultralytics.nn.modules.block.C3k2            [768, 768, 2, True]           
  7                  -1  1   5309952  ultralytics

100%|██████████| 5.35M/5.35M [00:00<00:00, 68.0MB/s]


[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1607.1±465.0 MB/s, size: 67.3 KB)


[34m[1mtrain: [0mScanning /kaggle/working/labels/train... 311 images, 0 backgrounds, 0 corrupt: 100%|██████████| 311/311 [00:00<00:00, 1371.08it/s]

[34m[1mtrain: [0mNew cache created: /kaggle/working/labels/train.cache





[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 721.1±473.6 MB/s, size: 65.6 KB)


[34m[1mval: [0mScanning /kaggle/working/labels/val... 78 images, 0 backgrounds, 0 corrupt: 100%|██████████| 78/78 [00:00<00:00, 1298.64it/s]

[34m[1mval: [0mNew cache created: /kaggle/working/labels/val.cache





Plotting labels to runs/detect/custom_yolov11_retail/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 167 weight(decay=0.0), 174 weight(decay=0.0005), 173 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/custom_yolov11_retail[0m
Starting training for 128 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/128      14.5G      1.463      1.909      1.548        102        640: 100%|██████████| 20/20 [00:24<00:00,  1.20s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:04<00:00,  1.34s/it]

                   all         78        912    0.00264     0.0439    0.00139   0.000508






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      2/128      14.5G      1.442      1.404      1.558         88        640: 100%|██████████| 20/20 [00:23<00:00,  1.16s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:02<00:00,  1.39it/s]

                   all         78        912          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      3/128      14.5G      1.624      1.455      1.675        246        640:  65%|██████▌   | 13/20 [00:16<00:08,  1.25s/it]



      3/128      14.5G      1.619      1.474      1.676        198        640:  75%|███████▌  | 15/20 [00:19<00:06,  1.36s/it]



      3/128      14.5G      1.596      1.471      1.669         68        640: 100%|██████████| 20/20 [00:25<00:00,  1.26s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:02<00:00,  1.34it/s]

                   all         78        912          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      4/128      14.4G      1.457      1.189      1.495        221        640:   5%|▌         | 1/20 [00:01<00:23,  1.25s/it]



      4/128      14.5G      1.709      1.379      1.706        187        640:  20%|██        | 4/20 [00:05<00:21,  1.34s/it]



      4/128      14.5G      1.733      1.405      1.716        260        640:  25%|██▌       | 5/20 [00:07<00:22,  1.48s/it]



      4/128      14.5G      1.681      1.478      1.689        235        640:  55%|█████▌    | 11/20 [00:15<00:12,  1.37s/it]



      4/128      14.5G      1.667      1.478      1.686        230        640:  65%|██████▌   | 13/20 [00:18<00:10,  1.43s/it]



      4/128      14.5G      1.666      1.479      1.688        251        640:  70%|███████   | 14/20 [00:20<00:09,  1.51s/it]



      4/128      14.5G      1.649      1.487      1.681        112        640: 100%|██████████| 20/20 [00:28<00:00,  1.42s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:02<00:00,  1.23it/s]

                   all         78        912          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      5/128      14.5G       1.56      1.366       1.59        191        640:  15%|█▌        | 3/20 [00:04<00:24,  1.43s/it]



      5/128      14.5G       1.56      1.416      1.634        178        640:  35%|███▌      | 7/20 [00:10<00:19,  1.48s/it]



      5/128      14.5G      1.555      1.424       1.64        185        640:  45%|████▌     | 9/20 [00:13<00:17,  1.56s/it]



      5/128      14.5G      1.534      1.407      1.628        278        640:  65%|██████▌   | 13/20 [00:20<00:10,  1.54s/it]



      5/128      14.5G      1.538      1.407      1.635        221        640:  75%|███████▌  | 15/20 [00:23<00:07,  1.56s/it]



      5/128      14.5G      1.535      1.402      1.633        199        640:  85%|████████▌ | 17/20 [00:26<00:04,  1.57s/it]



      5/128      14.5G      1.549      1.414       1.64         91        640: 100%|██████████| 20/20 [00:30<00:00,  1.51s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:02<00:00,  1.22it/s]

                   all         78        912          0          0          0          0






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      6/128      14.4G      1.863      1.527      1.738        278        640:   5%|▌         | 1/20 [00:01<00:26,  1.38s/it]



      6/128      14.5G      1.839      1.513       1.74        265        640:  15%|█▌        | 3/20 [00:04<00:25,  1.51s/it]



      6/128      14.5G      1.773      1.506      1.727        209        640:  25%|██▌       | 5/20 [00:07<00:22,  1.50s/it]



      6/128      14.5G      1.742      1.491      1.713        367        640:  30%|███       | 6/20 [00:09<00:21,  1.57s/it]



      6/128      14.5G      1.735      1.493      1.714        290        640:  35%|███▌      | 7/20 [00:11<00:21,  1.63s/it]



      6/128      14.5G      1.709      1.495      1.707        205        640:  45%|████▌     | 9/20 [00:14<00:17,  1.59s/it]



      6/128      14.5G      1.698      1.486      1.697        293        640:  55%|█████▌    | 11/20 [00:17<00:13,  1.53s/it]



      6/128      14.5G      1.691       1.49      1.703        252        640:  65%|██████▌   | 13/20 [00:20<00:10,  1.54s/it]



      6/128      14.5G      1.674      1.476      1.692        282        640:  75%|███████▌  | 15/20 [00:23<00:07,  1.55s/it]



      6/128      14.5G       1.66      1.461      1.683        240        640:  85%|████████▌ | 17/20 [00:26<00:04,  1.51s/it]



      6/128      14.5G      1.652       1.46      1.683         97        640: 100%|██████████| 20/20 [00:30<00:00,  1.51s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:02<00:00,  1.26it/s]

                   all         78        912          0          0          0          0
[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 5 epochs. Best results observed at epoch 1, best model saved as best.pt.
To update EarlyStopping(patience=5) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.






6 epochs completed in 0.052 hours.
Optimizer stripped from runs/detect/custom_yolov11_retail/weights/last.pt, 114.4MB
Optimizer stripped from runs/detect/custom_yolov11_retail/weights/best.pt, 114.4MB

Validating runs/detect/custom_yolov11_retail/weights/best.pt...
Ultralytics 8.3.111 🚀 Python-3.11.11 torch-2.5.1+cu124 CUDA:0 (Tesla T4, 15095MiB)
YOLO11x summary (fused): 190 layers, 56,828,179 parameters, 0 gradients, 194.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  33%|███▎      | 1/3 [00:01<00:02,  1.33s/it]

### Inferenece

In [7]:
import os
import json
from ultralytics import YOLO

# Step 1: Load the trained YOLO model
model_path = "/kaggle/working/custom_yolov11_retail_best_imgsz.pt"
model = YOLO(model_path)

# Step 2: Define the data YAML file (it should contain paths to your test/val images and labels)
data_yaml = "/kaggle/input/yamlfile2/data.yaml"

# Step 3: Evaluate the model
# The .val() method runs inference on the test/validation split and computes common metrics (e.g., mAP)
results = model.val(data=data_yaml)

# The results object typically contains a metrics dictionary. Print evaluation metrics.
print("Evaluation Metrics:")
print(results.metrics)

# Step 4: Save the evaluation metrics to a JSON file for further analysis
eval_results_path = "/kaggle/working/evaluation_results.json"
with open(eval_results_path, "w") as f:
    json.dump(results.metrics, f, indent=4)

print(f"Evaluation results saved to {eval_results_path}")


Ultralytics 8.3.95 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
YOLO11s summary (fused): 100 layers, 9,413,187 parameters, 0 gradients, 21.3 GFLOPs


[34m[1mval: [0mScanning /kaggle/input/selected-images/img_02/dataset/labels/train... 181 images, 0 backgrounds, 0 corrupt: 100%|██████████| 181/181 [00:00<00:00, 531.51it/s]




                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 12/12 [00:10<00:00,  1.20it/s]


                   all        181      20561      0.922      0.858      0.931      0.616


  xa[xa < 0] = -1
  xa[xa < 0] = -1


Speed: 1.3ms preprocess, 3.7ms inference, 0.0ms loss, 12.8ms postprocess per image
Results saved to [1mruns/detect/val5[0m
Evaluation Metrics:


AttributeError: 'DetMetrics' object has no attribute 'metrics'. See valid attributes below.

    Utility class for computing detection metrics such as precision, recall, and mean average precision (mAP).

    Attributes:
        save_dir (Path): A path to the directory where the output plots will be saved.
        plot (bool): A flag that indicates whether to plot precision-recall curves for each class.
        names (dict): A dictionary of class names.
        box (Metric): An instance of the Metric class for storing detection results.
        speed (dict): A dictionary for storing execution times of different parts of the detection process.
        task (str): The task type, set to 'detect'.
    