<a href="https://colab.research.google.com/github/harshitha521/-Real-Time-Conveyor-Belt-Bag-Detection-and-Distance-Estimation-for-the-Visually-Impaired/blob/main/Luggage_Detection_Source_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing kagglehub to access Kaggle datasets
import kagglehub

# Downloading suitcase/luggage dataset
path = kagglehub.dataset_download("dataclusterlabs/suitcaseluggage-dataset")

# showing dataset path
print("Path to dataset files:", path)


Path to dataset files: /kaggle/input/suitcaseluggage-dataset


In [None]:
# Defining the content for the data.yaml file required by YOLO for training
yaml_content = """
train: /kaggle/input/suitcaseluggage-dataset/images/train  # Path to training images
val: /kaggle/input/suitcaseluggage-dataset/images/val      # Path to validation images

nc: 1                              # Number of classes (in this case, just one: luggage)
names: ['luggage']                # Name of the class
"""

# Opening or creating a file named data.yaml in write mode and save the above content into it
with open("data.yaml", "w") as f:
    f.write(yaml_content)         # Writing the string to the file


In [None]:
# Defining the content for the data.yaml file needed by YOLO for training
yaml_content = """
train: /kaggle/input/suitcaseluggage-dataset/images/train   # Path to training images
val: /kaggle/input/suitcaseluggage-dataset/images/train     # Using training set as validation too (fallback if val set is missing)

nc: 1                               # Number of object classes
names: ['luggage']                  # Name of the class (only one: 'luggage')
"""


In [None]:
# Listing all files and folders recursively in the dataset
!ls -R /kaggle/input/suitcaseluggage-dataset

/kaggle/input/suitcaseluggage-dataset:
annotation  suitcase

/kaggle/input/suitcaseluggage-dataset/annotation:
annotation

/kaggle/input/suitcaseluggage-dataset/annotation/annotation:
'Datacluster_Labs_Suitcase (100).xml'  'Datacluster_Labs_Suitcase (33).xml'
'Datacluster_Labs_Suitcase (101).xml'  'Datacluster_Labs_Suitcase (34).xml'
'Datacluster_Labs_Suitcase (102).xml'  'Datacluster_Labs_Suitcase (35).xml'
'Datacluster_Labs_Suitcase (103).xml'  'Datacluster_Labs_Suitcase (36).xml'
'Datacluster_Labs_Suitcase (104).xml'  'Datacluster_Labs_Suitcase (37).xml'
'Datacluster_Labs_Suitcase (105).xml'  'Datacluster_Labs_Suitcase (38).xml'
'Datacluster_Labs_Suitcase (106).xml'  'Datacluster_Labs_Suitcase (39).xml'
'Datacluster_Labs_Suitcase (107).xml'  'Datacluster_Labs_Suitcase (3).xml'
'Datacluster_Labs_Suitcase (108).xml'  'Datacluster_Labs_Suitcase (40).xml'
'Datacluster_Labs_Suitcase (109).xml'  'Datacluster_Labs_Suitcase (41).xml'
'Datacluster_Labs_Suitcase (10).xml'   'Datacluster_Labs_

In [None]:
# Installing YOLO and XML parser
!pip install ultralytics xmltodict --quiet


In [None]:
# Importing required libraries
import os
import xmltodict
from sklearn.model_selection import train_test_split
import shutil

# Setting source paths for images and annotations
img_src = "/kaggle/input/suitcaseluggage-dataset/suitcase/suitcase"
ann_src = "/kaggle/input/suitcaseluggage-dataset/annotation/annotation"

# Creating folders for images and labels
os.makedirs("images/train", exist_ok=True)
os.makedirs("images/val", exist_ok=True)
os.makedirs("labels/train", exist_ok=True)
os.makedirs("labels/val", exist_ok=True)

# Defining function for converting bounding boxes from VOC to YOLO format
def convert_to_yolo(size, box):
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]
    x_center = (box['xmin'] + box['xmax']) / 2.0
    y_center = (box['ymin'] + box['ymax']) / 2.0
    w = box['xmax'] - box['xmin']
    h = box['ymax'] - box['ymin']
    return [x_center * dw, y_center * dh, w * dw, h * dh]

# Collecting all XML annotation files
annotations = [f for f in os.listdir(ann_src) if f.endswith(".xml")]

# Splitting annotation files into training and validation sets
train_files, val_files = train_test_split(annotations, test_size=0.2, random_state=42)

# Defining function for processing annotation files
def process_files(xml_files, subset):
    for file in xml_files:
        xml_path = os.path.join(ann_src, file)
        with open(xml_path) as f:
            data = xmltodict.parse(f.read())

        image_name = data['annotation']['filename']
        img_path = os.path.join(img_src, image_name)

        # Copying image to target folder
        shutil.copy(img_path, f"images/{subset}/{image_name}")

        # Extracting image width and height
        size = data['annotation']['size']
        w, h = int(size['width']), int(size['height'])

        # Extracting objects from annotation (handling single object as list)
        objects = data['annotation'].get('object', [])
        if not isinstance(objects, list):
            objects = [objects]

        yolo_lines = []
        for obj in objects:
            label = obj['name']
            bbox = obj['bndbox']
            box = {
                'xmin': float(bbox['xmin']),
                'ymin': float(bbox['ymin']),
                'xmax': float(bbox['xmax']),
                'ymax': float(bbox['ymax'])
            }
            # Converting to YOLO format
            yolo_box = convert_to_yolo((w, h), box)
            yolo_lines.append(f"0 {' '.join([f'{x:.6f}' for x in yolo_box])}")  # Using class ID 0

        # Saving YOLO label file
        txt_name = image_name.replace(".jpg", ".txt")
        with open(f"labels/{subset}/{txt_name}", "w") as f:
            f.write("\n".join(yolo_lines))

# Processing training and validation data
process_files(train_files, "train")
process_files(val_files, "val")


In [None]:
# Defining YOLO dataset configuration in YAML format
yaml_content = """
train: images/train
val: images/val
nc: 1
names: ['luggage']
"""

# Writing the YAML content to a file named 'data.yaml'
with open("data.yaml", "w") as f:
    f.write(yaml_content)


In [None]:
# Loading YOLOv8 nano model (can change to 'best.pt' if using a trained model)
# Starting training using custom dataset and annotations from 'data.yaml'
# in the below cell

In [None]:
from ultralytics import YOLO

model = YOLO("yolov8n.pt")  # or yolov8s.pt if you prefer
model.train(data="data.yaml", epochs=20)


Ultralytics 8.3.157 🚀 Python-3.11.13 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=20, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train3, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=12.0, pretrained=True, pr

[34m[1mtrain: [0mScanning /content/labels/train... 118 images, 2 backgrounds, 0 corrupt: 100%|██████████| 118/118 [00:07<00:00, 16.03it/s]

[34m[1mtrain: [0m/content/images/train/Datacluster_Labs_Suitcase (105).jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/content/images/train/Datacluster_Labs_Suitcase (25).jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/content/images/train/Datacluster_Labs_Suitcase (38).jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/content/images/train/Datacluster_Labs_Suitcase (39).jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/content/images/train/Datacluster_Labs_Suitcase (40).jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/content/images/train/Datacluster_Labs_Suitcase (43).jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/content/images/train/Datacluster_Labs_Suitcase (45).jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/content/images/train/Datacluster_Labs_Suitcase (46).jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/content/images/train/Datacluster_Labs_Suitcase (47).jpg: corrupt JPEG restored and saved
[34m[1m




[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2206.0±540.3 MB/s, size: 3744.6 KB)


[34m[1mval: [0mScanning /content/labels/val... 30 images, 2 backgrounds, 0 corrupt: 100%|██████████| 30/30 [00:01<00:00, 22.05it/s]

[34m[1mval: [0m/content/images/val/Datacluster_Labs_Suitcase (104).jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/content/images/val/Datacluster_Labs_Suitcase (42).jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/content/images/val/Datacluster_Labs_Suitcase (44).jpg: corrupt JPEG restored and saved
[34m[1mval: [0mNew cache created: /content/labels/val.cache
Plotting labels to runs/detect/train3/labels.jpg... 





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train3[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20         0G     0.8467      2.416      1.317         21        640: 100%|██████████| 8/8 [02:10<00:00, 16.33s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:11<00:00, 11.16s/it]

                   all         30         31    0.00344          1      0.754      0.556






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20         0G     0.8103      1.616      1.268         17        640: 100%|██████████| 8/8 [01:45<00:00, 13.13s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:10<00:00, 10.55s/it]

                   all         30         31      0.648     0.0323      0.603      0.373






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20         0G     0.7361      1.332      1.207         17        640: 100%|██████████| 8/8 [01:48<00:00, 13.60s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:10<00:00, 10.39s/it]

                   all         30         31      0.933      0.451      0.839      0.569






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20         0G     0.8544      1.375       1.34         18        640: 100%|██████████| 8/8 [01:43<00:00, 12.90s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:10<00:00, 10.28s/it]

                   all         30         31      0.847      0.714      0.716      0.355






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20         0G     0.8035      1.279      1.285         17        640: 100%|██████████| 8/8 [01:42<00:00, 12.77s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:11<00:00, 11.28s/it]

                   all         30         31       0.91       0.71      0.798      0.498






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20         0G     0.8585      1.296      1.272         24        640: 100%|██████████| 8/8 [01:40<00:00, 12.51s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:10<00:00, 10.50s/it]

                   all         30         31      0.842      0.686      0.762      0.547






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20         0G     0.8027       1.23      1.215         20        640: 100%|██████████| 8/8 [01:42<00:00, 12.78s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:09<00:00,  9.88s/it]

                   all         30         31      0.782      0.742      0.746      0.442






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20         0G     0.8402      1.154      1.254         16        640: 100%|██████████| 8/8 [01:41<00:00, 12.72s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:16<00:00, 16.87s/it]

                   all         30         31      0.683      0.839      0.768      0.499






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20         0G     0.8144       1.08      1.173         20        640: 100%|██████████| 8/8 [01:44<00:00, 13.05s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:09<00:00,  9.75s/it]

                   all         30         31      0.725      0.767      0.737      0.479






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20         0G     0.8426      1.137      1.247         17        640: 100%|██████████| 8/8 [01:46<00:00, 13.30s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:09<00:00,  9.83s/it]

                   all         30         31      0.493      0.419      0.392      0.169





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20         0G     0.7432      1.685      1.282          8        640: 100%|██████████| 8/8 [01:49<00:00, 13.64s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:11<00:00, 11.03s/it]

                   all         30         31      0.703      0.645      0.711      0.372






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20         0G     0.7778      1.539      1.261          7        640: 100%|██████████| 8/8 [01:41<00:00, 12.67s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:10<00:00, 10.57s/it]

                   all         30         31      0.794      0.497      0.669      0.379






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20         0G     0.6812      1.417      1.181          7        640: 100%|██████████| 8/8 [01:43<00:00, 12.88s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:10<00:00, 10.51s/it]

                   all         30         31      0.841      0.683      0.795      0.587






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20         0G     0.6637       1.31      1.207          7        640: 100%|██████████| 8/8 [01:52<00:00, 14.02s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:10<00:00, 10.37s/it]

                   all         30         31      0.858      0.783      0.818      0.633






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20         0G     0.6418      1.237      1.199          8        640: 100%|██████████| 8/8 [01:45<00:00, 13.17s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:10<00:00, 10.35s/it]

                   all         30         31      0.926      0.839      0.861      0.658






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20         0G     0.6439      1.209      1.145          8        640: 100%|██████████| 8/8 [01:44<00:00, 13.07s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:10<00:00, 10.40s/it]

                   all         30         31      0.952      0.774       0.88      0.621






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20         0G      0.593      1.148      1.117          7        640: 100%|██████████| 8/8 [01:43<00:00, 12.94s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:10<00:00, 10.47s/it]

                   all         30         31      0.946      0.774      0.867      0.639






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20         0G     0.5402      1.092      1.058          6        640: 100%|██████████| 8/8 [01:44<00:00, 13.08s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:10<00:00, 10.04s/it]

                   all         30         31      0.955      0.742      0.876      0.641






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20         0G     0.5404      1.063      1.082          7        640: 100%|██████████| 8/8 [01:52<00:00, 14.04s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:10<00:00, 10.64s/it]

                   all         30         31       0.87      0.871      0.908      0.686






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/20         0G      0.491      1.045      1.027          5        640: 100%|██████████| 8/8 [01:47<00:00, 13.42s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:10<00:00, 10.72s/it]

                   all         30         31       0.95      0.806      0.918      0.687






20 epochs completed in 0.653 hours.
Optimizer stripped from runs/detect/train3/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train3/weights/best.pt, 6.2MB

Validating runs/detect/train3/weights/best.pt...
Ultralytics 8.3.157 🚀 Python-3.11.13 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
Model summary (fused): 72 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:09<00:00,  9.85s/it]


                   all         30         31       0.95      0.806      0.918      0.688
Speed: 1.9ms preprocess, 214.3ms inference, 0.0ms loss, 6.3ms postprocess per image
Results saved to [1mruns/detect/train3[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7cf3c17a36d0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.048048, 

In [None]:
# Running prediction on validation images and saving results with confidence threshold 0.25
model.predict(source="images/val", save=True, conf=0.25)


Results saved to [1mruns/detect/predict[0m


[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted p

In [None]:
# Importing the files module from Colab and uploading files from local system
from google.colab import files
uploaded = files.upload()


Saving luggage.mp4 to luggage (5).mp4


In [None]:
# Running detection on the uploaded luggage video while saving the output with 0.25 confidence
model.predict(source="luggage.mp4", save=True, conf=0.25)


inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

Results saved to [1mruns/detect/predict2[0m


[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted p

In [None]:
# Installing required Python libraries for QR, OCR, and audio feedback
!pip install pyzbar pytesseract pyttsx3

# Installing system dependency for QR code decoding
!apt-get install libzbar0


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libzbar0 is already the newest version (0.23.92-4build2).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [None]:
# Installing the qrcode library with PIL support for generating QR codes
!pip install qrcode[pil]




In [None]:
from ultralytics import YOLO
import cv2
import numpy as np
import qrcode
import os
import random
from datetime import datetime
import glob

# Searching for the first .mp4 video file in the current directory
video_files = glob.glob("*.mp4")
if not video_files:
    raise FileNotFoundError("❌ No .mp4 video file found in the current directory.")
input_video = video_files[0]
print(f"📥 Using video: {input_video}")

# Loading the YOLOv8 model (can replace with 'best.pt' if fine-tuned)
model = YOLO("yolov8n.pt")

# Opening the selected video
cap = cv2.VideoCapture(input_video)

# Preparing video output settings
width = int(cap.get(3))
height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter("output_detected.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

# Creating folder to save generated QR codes
os.makedirs("qr_ids", exist_ok=True)
bag_qr_ids = {}  # Tracking QR codes for each detected bag

# Generating a unique ID using timestamp and random number
def generate_unique_id():
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")
    rand = random.randint(1000, 9999)
    return f"bag_{timestamp}_{rand}"

# Creating and saving the QR code image
def generate_qr_code(data, file_path):
    qr = qrcode.make(data)
    qr.save(file_path)

# Estimating distance based on box width
def estimate_distance(box_width):
    ref_box_width = 200  # box width at 100 cm distance
    ref_distance = 100
    return int((ref_box_width * ref_distance) / (box_width + 1e-5))

# Starting frame-by-frame bag detection
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Predicting using YOLO on the current frame
    results = model.predict(source=frame, conf=0.3, verbose=False)

    for r in results:
        for box in r.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            box_width = x2 - x1
            distance_cm = estimate_distance(box_width)

            # Assigning unique QR ID if box is new
            pos_key = (x1, y1, x2, y2)
            if pos_key not in bag_qr_ids:
                uid = generate_unique_id()
                qr_path = f"qr_ids/{uid}.png"
                generate_qr_code(uid, qr_path)
                bag_qr_ids[pos_key] = uid

            # Drawing bounding box and annotations
            qr_id = bag_qr_ids[pos_key]
            label = f"Luggage | {distance_cm} cm | {qr_id}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255, 255, 255), 2)

    # Saving the annotated frame
    out.write(frame)

# Releasing video resources
cap.release()
out.release()
print("✅ Detection completed. Video saved as 'output_detected.mp4'")


📥 Using video: luggage (3).mp4
✅ Detection completed. Video saved as 'output_detected.mp4'


In [None]:
from google.colab import files

# Initiating the download
files.download("output_detected.mp4")

# Showing a friendly confirmation message
print("✅ 'output_detected.mp4' downloaded successfully.")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ 'output_detected.mp4' downloaded successfully.


In [None]:
# ✅ Installing pyttsx3 for text-to-speech functionality
!pip install pyttsx3




In [None]:
from ultralytics import YOLO
import cv2
import numpy as np
import qrcode
import os
import glob

# Loading YOLOv8 model
model = YOLO("yolov8n.pt")

# Detecting the first available .mp4 video in the directory
video_files = glob.glob("*.mp4")
if not video_files:
    raise FileNotFoundError("❌ No .mp4 file found in the directory.")
input_video = video_files[0]
print(f"🎬 Using video: {input_video}")

# Creating folder to store generated QR codes
os.makedirs("generated_qrs", exist_ok=True)

# Opening the input video and setting up output video writer
cap = cv2.VideoCapture(input_video)
width = int(cap.get(3))
height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter("output_detected(a).mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

# Initializing memory to track bags and counter for unique IDs
bag_memory = {}
bag_id_counter = 1

# Defining function to estimate distance from box width
def estimate_distance(box_width):
    ref_box_width = 200
    ref_distance = 100
    return int((ref_box_width * ref_distance) / (box_width + 1e-5))

# Defining function to calculate IoU between two boxes
def get_iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interArea = max(0, xB - xA) * max(0, yB - yA)
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
    return interArea / float(boxAArea + boxBArea - interArea + 1e-5)

# Processing video frame by frame
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Running YOLO object detection on current frame
    results = model.predict(source=frame, conf=0.25, verbose=False)

    for r in results:
        for box in r.boxes:
            cls_id = int(box.cls[0])
            label = model.names[cls_id]
            if label.lower() not in ["suitcase", "luggage"]:
                continue

            # Getting box coordinates and calculating distance
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            box_width = x2 - x1
            distance_cm = estimate_distance(box_width)

            # Matching current box with previously seen bags using IoU
            matched_id = None
            for bag_id, prev_box in bag_memory.items():
                if get_iou((x1, y1, x2, y2), prev_box) > 0.5:
                    matched_id = bag_id
                    break

            if matched_id is None:
                # Assigning new unique ID and saving QR code
                matched_id = f"Bag_{bag_id_counter}"
                bag_id_counter += 1
                bag_memory[matched_id] = (x1, y1, x2, y2)

                qr_img = qrcode.make(matched_id)
                qr_img.save(f"generated_qrs/{matched_id}.png")

                # Printing real-time feedback in Colab
                print(f"🗣️ {matched_id} detected at approx {distance_cm} cm")

            else:
                # Updating position of existing bag
                bag_memory[matched_id] = (x1, y1, x2, y2)

            # Drawing detection results on frame
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f"{label} | {distance_cm} cm", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
            cv2.putText(frame, f"QR ID: {matched_id}", (x1, y2 + 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 200, 255), 2)

    # Writing processed frame to output video
    out.write(frame)

# Releasing resources
cap.release()
out.release()

# Final message
print("✅ Detection completed. Video saved as 'output_detected.mp4'")
print("📌 QR codes saved in 'generated_qrs/' folder.")


🎬 Using video: luggage (3).mp4
🗣️ Bag_1 detected at approx 76 cm
🗣️ Bag_2 detected at approx 155 cm
🗣️ Bag_3 detected at approx 285 cm
🗣️ Bag_4 detected at approx 322 cm
🗣️ Bag_5 detected at approx 86 cm
🗣️ Bag_6 detected at approx 540 cm
🗣️ Bag_7 detected at approx 112 cm
🗣️ Bag_8 detected at approx 454 cm
🗣️ Bag_9 detected at approx 289 cm
🗣️ Bag_10 detected at approx 194 cm
🗣️ Bag_11 detected at approx 327 cm
🗣️ Bag_12 detected at approx 68 cm
🗣️ Bag_13 detected at approx 118 cm
🗣️ Bag_14 detected at approx 689 cm
🗣️ Bag_15 detected at approx 232 cm
🗣️ Bag_16 detected at approx 204 cm
🗣️ Bag_17 detected at approx 217 cm
✅ Detection completed. Video saved as 'output_detected.mp4'
📌 QR codes saved in 'generated_qrs/' folder.


In [None]:
from google.colab import files

# Initiating the download
files.download("output_detected(a).mp4")

# Showing a friendly confirmation message
print("✅ 'output_detected.mp4' downloaded successfully.")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ 'output_detected.mp4' downloaded successfully.
