Sign Language Project Computer Vision Dataset

https://universe.roboflow.com/sign-language-colorful/sign-language-project-zxbft

In [11]:
# Install dependencies
!pip install -q ultralytics roboflow supervision

In [12]:
# Install library
import os
from google.colab import userdata
from pathlib import Path
import yaml
import pandas as pd
import numpy as np
from roboflow import Roboflow
from ultralytics import YOLO
import shutil
import cv2

In [13]:
# Load API required

# Import Roboflow API
os.environ["ROBOFLOW_API_KEY"] = userdata.get('ROBOFLOW_API_KEY')
ROBOFLOW_API_KEY = os.environ["ROBOFLOW_API_KEY"]

In [4]:
# Load first dataset (Object Detection)

rf_1 = Roboflow(api_key=ROBOFLOW_API_KEY)
project_1 = rf_1.workspace("test-hmtgo").project("sign-language-project-zxbft-ekfrd")
version_1 = project_1.version(1)
dataset_1 = version_1.download("yolov8")


loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Sign-Language-Project-1 to yolov8:: 100%|██████████| 49561/49561 [00:01<00:00, 30222.69it/s]





Extracting Dataset Version Zip to Sign-Language-Project-1 in yolov8:: 100%|██████████| 7212/7212 [00:01<00:00, 4454.21it/s]


In [5]:
# Set out directory for object detection

HOME = os.getcwd()
base_path = Path(HOME)
first_dataset_location = base_path / "Sign-Language-Project-1"
yaml_data = first_dataset_location / "data.yaml"

In [6]:
# Function for restructuring data.yaml so it would convert from 36 class into 1 class (hand) only

def convert_yolo_labels_to_single_class(
    dataset_root: str,
    target_class_id: int = 0,
    dry_run: bool = False
):
    """
    Convert all YOLO label files in train/valid/test splits to a single class.
    """
    dataset_root = Path(dataset_root)
    splits = ["train", "valid", "test"]

    for split in splits:
        labels_dir = dataset_root / split / "labels"
        if not labels_dir.exists():
            continue

        print(f"\nProcessing {labels_dir}")

        for label_file in labels_dir.glob("*.txt"):
            with open(label_file, "r") as f:
                lines = f.readlines()

            new_lines = []
            changed = False

            for line in lines:
                parts = line.strip().split()
                if len(parts) != 5:
                    continue

                if parts[0] != str(target_class_id):
                    parts[0] = str(target_class_id)
                    changed = True

                new_lines.append(" ".join(parts))

            if changed:
                if dry_run:
                    print(f"Dry Run Would update {label_file.name}")
                else:
                    with open(label_file, "w") as f:
                        f.write("\n".join(new_lines))
                    print(f"Updated {label_file.name}")


def update_data_yaml_to_single_class(
    yaml_path: str,
    class_name: str = "hand"
):
    with open(yaml_path, "r") as f:
        data = yaml.safe_load(f)

    data["nc"] = 1
    data["names"] = {0: class_name}

    with open(yaml_path, "w") as f:
        yaml.safe_dump(data, f)

    print(f"Updated {yaml_path}")

In [7]:
# Convert all label files to class 0
convert_yolo_labels_to_single_class(
    dataset_root="Sign-Language-Project-1",
    target_class_id=0
)
# Convert data.yaml to 1-class
update_data_yaml_to_single_class(
    yaml_path="Sign-Language-Project-1/data.yaml",
    class_name="hand"
)


Processing Sign-Language-Project-1/train/labels
Updated 3_e1c46a35-dc6f-11ed-a60b-b07d6448fdf4_png.rf.e199b7936544e0472c469e6347a36c75.txt
Updated B_7f031b07-d973-11ed-846e-b07d6448fdf4_jpg.rf.01bac13058cbed29bcd9a6809531f05b.txt
Updated 6_e1cc9c49-dc6f-11ed-982a-b07d6448fdf4_png.rf.09b90d75b603f745e62a6371ca4e1f5c.txt
Updated 6_e1cc03b0-dc6f-11ed-9e7b-b07d6448fdf4_png.rf.f1c527ff23f58bedfd102e03bfae0c73.txt
Updated V_7f3066e8-d973-11ed-8e53-b07d6448fdf4_jpg.rf.ca03b3410cf5a82d147f34a1d3d17b57.txt
Updated F_7f0d3cc7-d973-11ed-8e90-b07d6448fdf4_jpg.rf.223a995ccb90fb9d7473508c104e9a35.txt
Updated W_7f32a319-d973-11ed-b559-b07d6448fdf4_jpg.rf.ba1efc1e0c0629e6cf9dbbd5731ac34c.txt
Updated W_7f3256d4-d973-11ed-b902-b07d6448fdf4_jpg.rf.adb797ea2298def1f34f98eec0f37e13.txt
Updated Q_7f253a12-d973-11ed-9e93-b07d6448fdf4_jpg.rf.7601cf150ddef3cb1941e70850bf0e20.txt
Updated V_7f2fce55-d973-11ed-873d-b07d6448fdf4_jpg.rf.20c295e4712c5beba70dd17784004793.txt
Updated 8_e1d00980-dc6f-11ed-8342-b07d644

In [17]:
# Check GPU
!nvidia-smi

Sat Jan 17 15:44:36 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   37C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

Object Detection

In [9]:
# Model Comparison, pick the best model later

# 1. Base model
# (Pretrained YOLOv8s with no learning, no finetuning and augmentation as baseline)
model_1 = YOLO('yolov8s.pt')
results_1 = model_1.train(
    data=yaml_data,
    epochs=10,
    imgsz=640,
    batch=8,
    patience = 2,
    save_period = 2,
    name='model_1',
    lr0=0.0,
    augment=False,
)

# 2. Fine-tuned
# Fine tune only, no augmentation
model_2 = YOLO('yolov8s.pt')
results_2 = model_2.train(
    data=yaml_data,
    epochs=10,
    imgsz=640,
    batch=8,
    patience = 2,
    save_period = 2,
    name='model_2',
    lr0=0.001,
    augment=False,
)

# 3. Fine-tuned + no geometric augmentation
model_3 = YOLO('yolov8s.pt')
results_3 = model_3.train(
    data=yaml_data,
    epochs=10,
    imgsz=640,
    batch=8,
    patience = 2,
    save_period = 2,
    name='model_3',
    lr0=0.001,
    # Zero geometric augmentations
    fliplr=0.0,
    flipud=0.0,
    mosaic=0.0,
    degrees=0.0,
    translate=0.0,
    scale=0.0,
    shear=0.0,
    # Allowed photometric augmentations
    hsv_h=0.01,
    hsv_s=0.5,
    hsv_v=0.4,
    erasing=0.1,
)

# 4. Fine-tuned + geometric augmentation
model_4 = YOLO('yolov8s.pt')
results_4 = model_4.train(
    data=yaml_data,
    epochs=10,
    imgsz=640,
    batch=8,
    patience = 2,
    save_period = 2,
    name='model_4',
    lr0=0.001,
    fliplr=0.5,
    degrees=15,
    scale=0.5,
    hsv_h=0.01,
    hsv_s=0.5,
    hsv_v=0.4,
    erasing=0.1,
)

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov8s.pt to 'yolov8s.pt': 100% ━━━━━━━━━━━━ 21.5MB 131.5MB/s 0.2s
Ultralytics 8.4.5 🚀 Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/Sign-Language-Project-1/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.0, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0

In [9]:
from pathlib import Path
import shutil

drive_path = Path("/content/drive/MyDrive/exported_models")
drive_path.mkdir(exist_ok=True)

# Ensure exported_models is defined
exported_models = export_all_trained_models()

for model_file in exported_models:
    shutil.copy(Path("exported_models") / model_file, drive_path / model_file)
    print(f"Copied {model_file} to Google Drive.")

NameError: name 'export_all_trained_models' is not defined

In [10]:
# Evaluation
all_metrics = []

models = [
    ("Model 1 – Base", model_1, "No fine-tune, no augmentation"),
    ("Model 2 – FT No Aug", model_2, "Fine-tune only"),
    ("Model 3 – FT Safe Aug", model_3, "Photometric augmentation"),
    ("Model 4 – FT Geo Aug", model_4, "Geometric augmentation"),
]

for name, model, desc in models:
    metrics = model.val()
    metrics_result = metrics.results_dict

    all_metrics.append({
        "Model": name,
        "Description": desc,
        "Precision": metrics_result["metrics/precision(B)"],
        "Recall": metrics_result["metrics/recall(B)"],
        "mAP50": metrics_result["metrics/mAP50(B)"],
        "mAP50-95": metrics_result["metrics/mAP50-95(B)"],
    })

metrics_df = pd.DataFrame(all_metrics)
display(metrics_df)

Ultralytics 8.4.5 🚀 Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 73 layers, 11,125,971 parameters, 0 gradients, 28.4 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 750.2±368.6 MB/s, size: 16.0 KB)
[K[34m[1mval: [0mScanning /content/Sign-Language-Project-1/valid/labels.cache... 360 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 360/360 137.3Mit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 23/23 3.5it/s 6.6s
                   all        360        360          1          1      0.995      0.961
Speed: 2.5ms preprocess, 9.3ms inference, 0.0ms loss, 1.9ms postprocess per image
Results saved to [1m/content/runs/detect/val[0m
Ultralytics 8.4.5 🚀 Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 73 layers, 11,125,971 parameters, 0 gradients, 28.4 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read:

Unnamed: 0,Model,Description,Precision,Recall,mAP50,mAP50-95
0,Model 1 – Base,"No fine-tune, no augmentation",0.999875,1.0,0.995,0.961432
1,Model 2 – FT No Aug,Fine-tune only,0.999875,1.0,0.995,0.961432
2,Model 3 – FT Safe Aug,Photometric augmentation,0.992925,0.972222,0.993986,0.944662
3,Model 4 – FT Geo Aug,Geometric augmentation,0.999681,1.0,0.995,0.902568


In [11]:
# Download the best model (.pt)

def export_all_trained_models(
    runs_dir="runs/detect",
    model_names=("model_1", "model_2", "model_3", "model_4"),
    output_dir="exported_models",
    weight_name="best.pt"
):
    runs_dir = Path(runs_dir)
    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True)

    exported = []

    for model_name in model_names:
        src = runs_dir / model_name / "weights" / weight_name
        if not src.exists():
            print(f"[WARNING] {src} not found, skipping")
            continue

        dst = output_dir / f"{model_name}_{weight_name}"
        shutil.copy(src, dst)
        exported.append(dst.name)
        print(f"Exported: {dst}")

    return exported

In [12]:
exported_models = export_all_trained_models()
print("Exported models:", exported_models)

Exported: exported_models/model_1_best.pt
Exported: exported_models/model_2_best.pt
Exported: exported_models/model_3_best.pt
Exported: exported_models/model_4_best.pt
Exported models: ['model_1_best.pt', 'model_2_best.pt', 'model_3_best.pt', 'model_4_best.pt']


In [None]:
# So we will use the ... model because of blabla (download .pt nya)

Image Classification

In [14]:
# Load second dataset (Image Classification)
rf_2 = Roboflow(api_key=ROBOFLOW_API_KEY)
project_2 = rf_2.workspace("test-hmtgo").project("handsign-5nz1l-ehtlc")
version_2 = project_2.version(1)
dataset_2 = version_2.download("folder")

loading Roboflow workspace...
loading Roboflow project...


In [15]:
# Set out directory for image classification

HOME = os.getcwd()
base_path = Path(HOME)
second_dataset_location = base_path / "HandSIgn-1"


In [16]:
ls {second_dataset_location}

README.dataset.txt   [0m[01;34mtest[0m/   train.cache  valid.cache
README.roboflow.txt  [01;34mtrain[0m/  [01;34mvalid[0m/


In [19]:
# Model Comparison
# 1. Validation only (no training)
model_cls_1 = YOLO("yolov8n-cls.pt")

cls_1_metrics = model_cls_1.val(
    data=second_dataset_location,
    imgsz=224,
    name="cls_model_1"
)

Ultralytics 8.4.5 🚀 Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
YOLOv8n-cls summary (fused): 30 layers, 2,715,880 parameters, 0 gradients, 4.3 GFLOPs
[34m[1mtrain:[0m /content/HandSIgn-1/train... found 7920 images in 29 classes ✅ 
[34m[1mval:[0m /content/HandSIgn-1/valid... found 990 images in 29 classes ✅ 
[34m[1mtest:[0m /content/HandSIgn-1/test... found 990 images in 29 classes ✅ 
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 227.3±78.7 MB/s, size: 5.9 KB)
[K[34m[1mval: [0mScanning /content/HandSIgn-1/valid... 990 images, 0 corrupt: 100% ━━━━━━━━━━━━ 990/990 415.2Mit/s 0.0s
[K               classes   top1_acc   top5_acc: 100% ━━━━━━━━━━━━ 62/62 25.0it/s 2.5s
                   all          0          0
Speed: 0.2ms preprocess, 0.9ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1m/content/runs/classify/cls_model_13[0m


In [21]:
# 2. Fine-tuned, no augmentation
model_cls_2 = YOLO("yolov8n-cls.pt")

cls_2_results = model_cls_2.train(
    data=second_dataset_location,
    epochs=40,
    imgsz=224,
    batch=32,
    name="cls_model_2",
    lr0=0.001,
    optimizer="Adam",
    hsv_h=0.0,
    hsv_s=0.0,
    hsv_v=0.0,
    erasing=0.0,
    fliplr=0.0,
    flipud=0.0,
    degrees=0.0,
    translate=0.0,
    scale=0.0,
    shear=0.0,
)

Ultralytics 8.4.5 🚀 Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=False, auto_augment=randaugment, batch=32, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/HandSIgn-1, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=40, erasing=0.0, exist_ok=False, fliplr=0.0, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.0, hsv_s=0.0, hsv_v=0.0, imgsz=224, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n-cls.pt, momentum=0.937, mosaic=1.0, multi_scale=0.0, name=cls_model_23, nbs=64, nms=False, opset=None, optimize=False, optimizer=Adam, overlap_mask=True, patience=100, perspective=0.

In [23]:
# 3. Fine-tuned, photometric augmentation
model_cls_3 = YOLO("yolov8n-cls.pt")

cls_3_results = model_cls_3.train(
    data=second_dataset_location,
    epochs=40,
    imgsz=224,
    batch=32,
    patience = 2,
    save_period = 2,
    name="cls_model_3",
    lr0=0.001,
    optimizer="Adam",
    hsv_h=0.01,
    hsv_s=0.4,
    hsv_v=0.4,
    erasing=0.1,
    fliplr=0.0,
    flipud=0.0,
    degrees=0.0,
    translate=0.0,
    scale=0.0,
    shear=0.0,
)

Ultralytics 8.4.5 🚀 Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=False, auto_augment=randaugment, batch=32, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/HandSIgn-1, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=40, erasing=0.1, exist_ok=False, fliplr=0.0, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.01, hsv_s=0.4, hsv_v=0.4, imgsz=224, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n-cls.pt, momentum=0.937, mosaic=1.0, multi_scale=0.0, name=cls_model_32, nbs=64, nms=False, opset=None, optimize=False, optimizer=Adam, overlap_mask=True, patience=2, perspective=0.0

In [24]:
# 4. Fine-tuned, photomettric + geometric augmentation
model_cls_4 = YOLO("yolov8n-cls.pt")

cls_4_results = model_cls_4.train(
    data=second_dataset_location,
    epochs=40,
    imgsz=224,
    batch=32,
    patience = 2,
    save_period = 2,
    name="cls_model_4",
    lr0=0.001,
    optimizer="Adam",
    hsv_h=0.01,
    hsv_s=0.4,
    hsv_v=0.4,
    erasing=0.1,
    fliplr=0.5,
    degrees=15,
    scale=0.3,
    translate=0.1,
)

Ultralytics 8.4.5 🚀 Python-3.12.12 torch-2.9.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=False, auto_augment=randaugment, batch=32, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/HandSIgn-1, degrees=15, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=40, erasing=0.1, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.01, hsv_s=0.4, hsv_v=0.4, imgsz=224, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n-cls.pt, momentum=0.937, mosaic=1.0, multi_scale=0.0, name=cls_model_4, nbs=64, nms=False, opset=None, optimize=False, optimizer=Adam, overlap_mask=True, patience=2, perspective=0.0, 

In [25]:
import pandas as pd
from pathlib import Path

def collect_classification_metrics(
    runs_dir="runs/classify",
    model_names=("cls_model_1", "cls_model_2", "cls_model_3", "cls_model_4"),
):
    rows = []

    for name in model_names:
        results_path = Path(runs_dir) / name / "results.csv"
        if not results_path.exists():
            print(f"[WARNING] {results_path} not found, skipping")
            continue

        df = pd.read_csv(results_path)

        # YOLOv8 classification stores final metrics in last row
        final_row = df.iloc[-1]

        rows.append({
            "model": name,
            "top1_accuracy": final_row.get("metrics/accuracy_top1", None),
            "top5_accuracy": final_row.get("metrics/accuracy_top5", None),
            "epochs": final_row.get("epoch", None),
        })

    return pd.DataFrame(rows)

In [10]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [26]:
cls_metrics_df = collect_classification_metrics()
cls_metrics_df



Unnamed: 0,model,top1_accuracy,top5_accuracy,epochs
0,cls_model_4,0.99394,1.0,9.0


Webcam integration

In [None]:
model = YOLO("exported_models/handsign_cls_best.pt")
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    res = model(frame, imgsz=224)[0]
    probs = res.probs

    label = model.names[probs.top1]
    conf = probs.top1conf.item()

    cv2.putText(
        frame,
        f"{label} ({conf:.2f})",
        (20, 40),
        cv2.FONT_HERSHEY_SIMPLEX,
        1,
        (0, 255, 0),
        2
    )

    cv2.imshow("Hand Sign Classification", frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()

Streamlit / Gradio Interface