In [1]:
import os, sys, math, time, random
from pathlib import Path
import numpy as np
import pandas as pd
import torch
os.environ["CUDA_VISIBLE_DEVICES"] = "7" 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, ConcatDataset, Subset, random_split
import torchvision.transforms as T
import torchvision.datasets as datasets
import timm
from tqdm import tqdm

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)
SEED = 42
if DEVICE == "cuda":
    print("GPU name:", torch.cuda.get_device_name(0))
    print("Total GPU mem (GB):", torch.cuda.get_device_properties(0).total_memory / (1024**3))

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if DEVICE == "cuda":
    torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

print("timm version:", timm.__version__)
print("PyTorch:", torch.__version__)

import os
from pathlib import Path
import torch
from ultralytics import YOLO
import json
import pprint

print("PyTorch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA device:", torch.cuda.get_device_name(0))

  from .autonotebook import tqdm as notebook_tqdm


Device: cuda
GPU name: Tesla V100-SXM2-32GB
Total GPU mem (GB): 31.7325439453125
timm version: 1.0.21
PyTorch: 2.6.0+cu124
PyTorch: 2.6.0+cu124
CUDA available: True
CUDA device: Tesla V100-SXM2-32GB


In [2]:
# Edit this if your dataset is somewhere else
DATA_ROOT = Path("/home/23ucc611/SWE/dataset")  # <-- change if needed
TRAIN_IMG = DATA_ROOT / "train" / "images"
TRAIN_LABEL = DATA_ROOT / "train" / "labels"
VAL_IMG = DATA_ROOT / "val" / "images"
VAL_LABEL = DATA_ROOT / "val" / "labels"
TEST_IMG = DATA_ROOT / "test" / "images"
TEST_LABEL = DATA_ROOT / "test" / "labels"

for p in [TRAIN_IMG, TRAIN_LABEL, VAL_IMG, VAL_LABEL, TEST_IMG, TEST_LABEL]:
    print(p, "exists?", p.exists(), "count:", len(list(p.glob("*"))))


/home/23ucc611/SWE/dataset/train/images exists? True count: 7188
/home/23ucc611/SWE/dataset/train/labels exists? True count: 7188
/home/23ucc611/SWE/dataset/val/images exists? True count: 2044
/home/23ucc611/SWE/dataset/val/labels exists? True count: 2044
/home/23ucc611/SWE/dataset/test/images exists? True count: 1063
/home/23ucc611/SWE/dataset/test/labels exists? True count: 1063


In [3]:
# Show classes.txt
classes_file = Path("/home/23ucc611/SWE/dataset/classes.txt")  # path where file got uploaded
if classes_file.exists():
    with open(classes_file, "r", encoding="utf8") as f:
        lines = [l.strip() for l in f.readlines() if l.strip()]
    print(f"Classes file lines: {len(lines)} (indices appear to be):")
    for l in lines:
        print(" ", l)
else:
    print("classes.txt not found at", classes_file)

# Show data.yaml if present
data_yaml_path = Path("/home/23ucc611/SWE/dataset/data.yaml")  # adjust if your file is elsewhere
if data_yaml_path.exists():
    with open(data_yaml_path, "r", encoding="utf8") as f:
        print("\n----- data.yaml preview -----")
        print(f.read())
else:
    print("\nNo data.yaml at", data_yaml_path, "\nIf missing, we'll create one below.")
DATA_YAML = Path("data_wildlife.yaml")
if not DATA_YAML.exists():
    # Try to auto-populate names from classes.txt
    names = [l.split(maxsplit=1)[-1] if " " in l else l for l in lines] if 'lines' in globals() else None
    nc = len(names) if names else None
    yaml_text = f"""train: {TRAIN_IMG}
val:   {VAL_IMG}
test:  {TEST_IMG}
nc: {nc if nc is not None else 'REPLACE_WITH_NUM_CLASSES'}
names: {names if names is not None else 'REPLACE_WITH_CLASS_LIST'}
"""
    DATA_YAML.write_text(yaml_text)
    print("Wrote data_wildlife.yaml ‚Äî open it and confirm nc / names look correct.")
else:
    print("data_wildlife.yaml already exists locally. Use that or update it manually.")
print(DATA_YAML.resolve())


Classes file lines: 36 (indices appear to be):
  0 Bear
  1 Brown bear
  2 Buffalo
  3 Bull
  4 Cattle
  5 Cheetah
  6 Chicken
  7 Deer
  8 Elephant
  9 Fox
  10 Giraffe
  11 Goat
  12 Hippopotamus
  13 Horse
  14 Jaguar
  15 Kangaroo
  16 Koala
  17 Leopard
  18 Lion
  19 Lynx
  20 Monkey
  21 Mule
  22 Ostrich
  23 Otter
  24 Panda
  25 Penguin
  26 Pig
  27 Polar bear
  28 Rabbit
  29 Raccoon
  30 Red panda
  31 Rhinoceros
  32 Sheep
  33 Tiger
  34 Turkey
  35 Zebra

----- data.yaml preview -----
names:
- Bear
- Brown bear
- Buffalo
- Bull
- Cattle
- Cheetah
- Chicken
- Deer
- Elephant
- Fox
- Giraffe
- Goat
- Hippopotamus
- Horse
- Jaguar
- Kangaroo
- Koala
- Leopard
- Lion
- Lynx
- Monkey
- Mule
- Ostrich
- Otter
- Panda
- Penguin
- Pig
- Polar bear
- Rabbit
- Raccoon
- Red panda
- Rhinoceros
- Sheep
- Tiger
- Turkey
- Zebra
nc: 36
path: C:\Users\vedan\Machine Learning\SWE PROJECT\dataset
test: test/images
train: train/images
val: val/images

data_wildlife.yaml already exists loc

In [4]:
import glob

def check_labels(img_dir, lbl_dir, max_print=5):
    imgs = sorted([p for p in Path(img_dir).glob("*") if p.suffix.lower() in [".jpg",".jpeg",".png"]])
    lbls = sorted(list(Path(lbl_dir).glob("*.txt")))
    print(f"{len(imgs)} images, {len(lbls)} label files in {img_dir} / {lbl_dir}")
    # Show some mismatches
    mismatch = []
    for im in imgs[:1000]:  # limit check to first 1000
        expected_lbl = Path(lbl_dir) / (im.stem + ".txt")
        if not expected_lbl.exists():
            mismatch.append(im.name)
            if len(mismatch) >= max_print:
                break
    if mismatch:
        print("Missing label files for (sample):", mismatch)
    else:
        print("All sample images have matching label files.")
    # Read one label file to confirm format (class x_center y_center w h)
    sample_lbls = lbls[:3]
    for s in sample_lbls:
        with open(s) as f:
            print("----", s.name)
            for l in f.readlines()[:5]:
                print("   ", l.strip())

check_labels(TRAIN_IMG, TRAIN_LABEL)
check_labels(VAL_IMG, VAL_LABEL)


7188 images, 7188 label files in /home/23ucc611/SWE/dataset/train/images / /home/23ucc611/SWE/dataset/train/labels
All sample images have matching label files.
---- Bear_01650042709e7a82.txt
    0 0.380000 0.216698 0.781875 0.951219
---- Bear_02d4376234375c2f.txt
    0 0.012500 0.000000 0.998611 0.997059
---- Bear_04af0dc0511cb98d.txt
    0 0.165655 0.260728 0.795317 0.565670
2044 images, 2044 label files in /home/23ucc611/SWE/dataset/val/images / /home/23ucc611/SWE/dataset/val/labels
All sample images have matching label files.
---- Bear_0155b267ade95d1e.txt
    0 0.188281 0.119578 0.828125 0.887456
---- Bear_0312356c607c20dc.txt
    0 0.313889 0.017361 0.990972 0.843750
---- Bear_073f21a5b0fa62dc.txt
    0 0.110401 0.206875 0.840328 0.929375


In [5]:
# Recommended defaults (change if you want):
MODEL_NAME = "yolov8l.pt"     # yolov8x (largest); try yolov8l / yolov8m for faster iteration
IMG_SIZE = 1024               # larger helps wildlife detection; 640 is faster
BATCH = 16                   # starting point; with V100 32GB you can likely go >=16 for img 1024
EPOCHS = 100
WORKERS = 8
DEVICE = 0                    # GPU id (0)
EXPERIMENT_NAME = "wildlife_yolov8x_1024"

In [None]:
model = YOLO(MODEL_NAME)  # loads yolov8x pre-trained weights automatically (internet required the first time)

# train ‚Äî many options are supported; the ultralytics API will create runs/train/<name>
# Key args: data (yaml), epochs, imgsz, batch, device, workers, patience (early stop)
# If you want mixed-precision automatic, ultralytics uses AMP internally if available.
model.train(
    data=str(DATA_YAML),   # path to data yaml created/verified above
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH,
    device=DEVICE,
    workers=WORKERS,
    name=EXPERIMENT_NAME,
    optimizer="Adam",    # or 'SGD'
    lr0=1e-3,
    patience=30,         # early stopping patience (stop if no improvement)
    save=True
)


[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8l.pt to 'yolov8l.pt': 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 83.7MB 20.9MB/s 4.0s3.9s<0.1s
Ultralytics 8.3.229 üöÄ Python-3.10.18 torch-2.6.0+cu124 CUDA:0 (Tesla V100-SXM2-32GB, 32494MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data_wildlife.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8

In [None]:
# Ultralytics stores runs in ./runs/train/<EXPERIMENT_NAME> by default
runs_root = Path("runs") / "train" / EXPERIMENT_NAME
print("Runs folder:", runs_root.resolve())
print("Files in run folder:")
for p in sorted(runs_root.glob("*")):
    print(" ", p.name)
# Best weights often saved as weights/best.pt
best_weights = runs_root / "weights" / "best.pt"
last_weights = runs_root / "weights" / "last.pt"
print("Best:", best_weights.exists(), best_weights)
print("Last:", last_weights.exists(), last_weights)


In [None]:
# Use model.val to compute metrics
# If you want to use a specific weights file, load it:
trained = YOLO(str(best_weights)) if best_weights.exists() else model

# 'val' will return a metrics dict and print summary
metrics = trained.val(data=str(DATA_YAML), batch=BATCH, imgsz=IMG_SIZE, device=DEVICE)
print("Validation metrics:", metrics)


In [None]:
import shutil
out_dir = Path("runs") / "test" / EXPERIMENT_NAME
out_dir.mkdir(parents=True, exist_ok=True)
# Do predictions for the test set images and save annotated images
test_images = sorted(list(Path(TEST_IMG).glob("*.*")))
# run inference in batches (demonstration: process first 200 images)
for i, img_path in enumerate(test_images[:200]):
    res = trained.predict(source=str(img_path), imgsz=IMG_SIZE, device=DEVICE, conf=0.25, save=True, save_dir=str(out_dir))
    if (i+1) % 50 == 0:
        print(f"Processed {i+1}/{min(len(test_images),200)}")
print("Annotated images saved to:", out_dir)


In [None]:
# Export best model to ONNX and TorchScript
if best_weights.exists():
    export_model = YOLO(str(best_weights))
else:
    export_model = trained

# Export to ONNX (use opset 12 or change as needed)
export_model.export(format="onnx")        # creates runs/export/..
export_model.export(format="torchscript")
print("Export complete; check runs/export folder.")
