In [1]:
import os, sys, math, time, random
from pathlib import Path
import numpy as np
import pandas as pd
import torch
os.environ["CUDA_VISIBLE_DEVICES"] = "5" 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, ConcatDataset, Subset, random_split
import torchvision.transforms as T
import torchvision.datasets as datasets
import timm
from tqdm import tqdm

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)
SEED = 42
if DEVICE == "cuda":
    print("GPU name:", torch.cuda.get_device_name(0))
    print("Total GPU mem (GB):", torch.cuda.get_device_properties(0).total_memory / (1024**3))

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if DEVICE == "cuda":
    torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

print("timm version:", timm.__version__)
print("PyTorch:", torch.__version__)

import os
from pathlib import Path
import torch
from ultralytics import YOLO
import json
import pprint

print("PyTorch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA device:", torch.cuda.get_device_name(0))

  from .autonotebook import tqdm as notebook_tqdm


Device: cuda
GPU name: Tesla V100-SXM2-32GB
Total GPU mem (GB): 31.7325439453125
timm version: 1.0.21
PyTorch: 2.6.0+cu124
PyTorch: 2.6.0+cu124
CUDA available: True
CUDA device: Tesla V100-SXM2-32GB


In [2]:
# Edit this if your dataset is somewhere else
DATA_ROOT = Path("/home/23ucc611/SWE/dataset_2")  # <-- change if needed
TRAIN_IMG = DATA_ROOT / "train" / "images"
TRAIN_LABEL = DATA_ROOT / "train" / "labels"
VAL_IMG = DATA_ROOT / "val" / "images"
VAL_LABEL = DATA_ROOT / "val" / "labels"
TEST_IMG = DATA_ROOT / "test" / "images"
TEST_LABEL = DATA_ROOT / "test" / "labels"

for p in [TRAIN_IMG, TRAIN_LABEL, VAL_IMG, VAL_LABEL, TEST_IMG, TEST_LABEL]:
    print(p, "exists?", p.exists(), "count:", len(list(p.glob("*"))))


/home/23ucc611/SWE/dataset_2/train/images exists? True count: 1056
/home/23ucc611/SWE/dataset_2/train/labels exists? True count: 1056
/home/23ucc611/SWE/dataset_2/val/images exists? True count: 224
/home/23ucc611/SWE/dataset_2/val/labels exists? True count: 224
/home/23ucc611/SWE/dataset_2/test/images exists? True count: 224
/home/23ucc611/SWE/dataset_2/test/labels exists? True count: 224


In [3]:
# Cell C ‚Äî classes file and auto-create data_wildlife.yaml if missing
classes_file = DATA_ROOT / "classes.txt"
if classes_file.exists():
    with open(classes_file, "r", encoding="utf8") as f:
        lines = [l.strip() for l in f.readlines() if l.strip()]
    print(f"Classes file lines: {len(lines)}")
    for i, l in enumerate(lines):
        print(f"  {i}: {l}")
else:
    raise SystemExit(f"classes.txt not found at {classes_file} ‚Äî run the prepare script first.")

# Ensure data yaml in dataset_2 exists (Ultralytics accepts absolute paths)
DATA_YAML = DATA_ROOT / "data_wildlife.yaml"
if not DATA_YAML.exists():
    yaml_text = f"""train: {str(TRAIN_IMG.resolve())}
val:   {str(VAL_IMG.resolve())}
test:  {str(TEST_IMG.resolve())}
nc: {len(lines)}
names: {lines}
"""
    DATA_YAML.write_text(yaml_text)
    print("Wrote data_wildlife.yaml:", DATA_YAML)
else:
    print("Using existing data yaml:", DATA_YAML)
print("DATA_YAML path:", DATA_YAML)

Classes file lines: 4
  0: buffalo
  1: elephant
  2: rhino
  3: zebra
Using existing data yaml: /home/23ucc611/SWE/dataset_2/data_wildlife.yaml
DATA_YAML path: /home/23ucc611/SWE/dataset_2/data_wildlife.yaml


In [4]:
import glob

def check_labels(img_dir, lbl_dir, max_print=5):
    imgs = sorted([p for p in Path(img_dir).glob("*") if p.suffix.lower() in [".jpg",".jpeg",".png"]])
    lbls = sorted(list(Path(lbl_dir).glob("*.txt")))
    print(f"{len(imgs)} images, {len(lbls)} label files in {img_dir} / {lbl_dir}")
    # Show some mismatches
    mismatch = []
    for im in imgs[:1000]:  # limit check to first 1000
        expected_lbl = Path(lbl_dir) / (im.stem + ".txt")
        if not expected_lbl.exists():
            mismatch.append(im.name)
            if len(mismatch) >= max_print:
                break
    if mismatch:
        print("Missing label files for (sample):", mismatch)
    else:
        print("All sample images have matching label files.")
    # Read one label file to confirm format (class x_center y_center w h)
    sample_lbls = lbls[:3]
    for s in sample_lbls:
        with open(s) as f:
            print("----", s.name)
            for l in f.readlines()[:5]:
                print("   ", l.strip())

check_labels(TRAIN_IMG, TRAIN_LABEL)
check_labels(VAL_IMG, VAL_LABEL)


1056 images, 1056 label files in /home/23ucc611/SWE/dataset_2/train/images / /home/23ucc611/SWE/dataset_2/train/labels
All sample images have matching label files.
---- 0_buffalo_0001.txt
    0 0.628906 0.497973 0.731250 0.982432
---- 0_buffalo_0002.txt
    0 0.162891 0.586486 0.177344 0.345946
    0 0.322656 0.582432 0.251563 0.335135
    0 0.646094 0.552703 0.343750 0.389189
    0 0.886719 0.529054 0.156250 0.379730
---- 0_buffalo_0003.txt
    0 0.257813 0.575000 0.212500 0.455405
    0 0.516797 0.513514 0.305469 0.705405
224 images, 224 label files in /home/23ucc611/SWE/dataset_2/val/images / /home/23ucc611/SWE/dataset_2/val/labels
All sample images have matching label files.
---- 0_buffalo_0001.txt
    0 0.569249 0.554167 0.384977 0.600000
---- 0_buffalo_0002.txt
    0 0.480859 0.477027 0.588281 0.632432
---- 0_buffalo_0003.txt
    0 0.435937 0.586486 0.651563 0.662162


In [5]:
# Recommended defaults (change if you want):
MODEL_NAME = "yolov8l.pt"     # yolov8x (largest); try yolov8l / yolov8m for faster iteration
IMG_SIZE = 1024               # larger helps wildlife detection; 640 is faster
BATCH = 8                   # starting point; with V100 32GB you can likely go >=16 for img 1024
EPOCHS = 100
WORKERS = 8
EXPERIMENT_NAME = "wildlife_yolov8x_african"

OPTIMIZER = "Adam"
LR0 = 1e-3
WEIGHT_DECAY = 5e-4
PATIENCE = 20    

print(f"Training on: {len(list(TRAIN_IMG.glob('*')))} train images, batch {BATCH}, img_size {IMG_SIZE}")
steps_per_epoch = math.ceil(len(list(TRAIN_IMG.glob('*'))) / BATCH)
print("Approx steps per epoch:", steps_per_epoch)

Training on: 1056 train images, batch 8, img_size 1024
Approx steps per epoch: 132


In [6]:
model = YOLO(MODEL_NAME)  # loads yolov8x pre-trained weights automatically (internet required the first time)

# train ‚Äî many options are supported; the ultralytics API will create runs/train/<name>
# Key args: data (yaml), epochs, imgsz, batch, device, workers, patience (early stop)
# If you want mixed-precision automatic, ultralytics uses AMP internally if available.
model.train(
    data=str(DATA_YAML),   # path to data yaml created/verified above
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH,
    device=DEVICE,
    workers=WORKERS,
    name=EXPERIMENT_NAME,
    optimizer="Adam",    # or 'SGD'
    lr0=1e-3,
    patience=30,         # early stopping patience (stop if no improvement)
    save=True
)


[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8l.pt to 'yolov8l.pt': 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 83.7MB 23.1MB/s 3.6s.6s<0.6ss
Ultralytics 8.3.229 üöÄ Python-3.10.18 torch-2.6.0+cu124 CUDA:0 (Tesla V100-SXM2-32GB, 32494MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/home/23ucc611/SWE/dataset_2/data_wildlife.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f4c0c479390>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0

In [7]:
# Ultralytics stores runs in ./runs/train/<EXPERIMENT_NAME> by default
runs_root = Path("runs") / "train" / EXPERIMENT_NAME
print("Runs folder:", runs_root.resolve())
print("Files in run folder:")
for p in sorted(runs_root.glob("*")):
    print(" ", p.name)
# Best weights often saved as weights/best.pt
best_weights = runs_root / "weights" / "best.pt"
last_weights = runs_root / "weights" / "last.pt"
print("Best:", best_weights.exists(), best_weights)
print("Last:", last_weights.exists(), last_weights)


Runs folder: /home/23ucc611/SWE/runs/train/wildlife_yolov8x_african
Files in run folder:
Best: False runs/train/wildlife_yolov8x_african/weights/best.pt
Last: False runs/train/wildlife_yolov8x_african/weights/last.pt


In [8]:
# Use model.val to compute metrics
# If you want to use a specific weights file, load it:
trained = YOLO(str(best_weights)) if best_weights.exists() else model

# 'val' will return a metrics dict and print summary
metrics = trained.val(data=str(DATA_YAML), batch=BATCH, imgsz=IMG_SIZE, device=DEVICE)
print("Validation metrics:", metrics)


Ultralytics 8.3.229 üöÄ Python-3.10.18 torch-2.6.0+cu124 CUDA:0 (Tesla V100-SXM2-32GB, 32494MiB)
Model summary (fused): 112 layers, 43,609,692 parameters, 0 gradients, 164.8 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 1216.0¬±404.6 MB/s, size: 132.3 KB)
[K[34m[1mval: [0mScanning /home/23ucc611/SWE/dataset_2/val/labels.cache... 224 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 224/224 195.7Kit/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 28/28 1.2s/it 32.4s0.5s
                   all        224        376      0.968      0.925      0.973      0.844
               buffalo         56         75      0.963      0.933      0.975      0.875
              elephant         57        102      0.939        0.9      0.959      0.792
                 rhino         56         78      0.987      0.949      0.984      0.911
                 z

In [9]:
import shutil
out_dir = Path("runs") / "test" / EXPERIMENT_NAME
out_dir.mkdir(parents=True, exist_ok=True)
# Do predictions for the test set images and save annotated images
test_images = sorted(list(Path(TEST_IMG).glob("*.*")))
# run inference in batches (demonstration: process first 200 images)
for i, img_path in enumerate(test_images[:200]):
    res = trained.predict(source=str(img_path), imgsz=IMG_SIZE, device=DEVICE, conf=0.25, save=True, save_dir=str(out_dir))
    if (i+1) % 50 == 0:
        print(f"Processed {i+1}/{min(len(test_images),200)}")
print("Annotated images saved to:", out_dir)



image 1/1 /home/23ucc611/SWE/dataset_2/test/images/0_buffalo_0001.jpg: 704x1024 1 buffalo, 2788.8ms
Speed: 17.1ms preprocess, 2788.8ms inference, 2.8ms postprocess per image at shape (1, 3, 704, 1024)
Results saved to [1m/home/23ucc611/SWE/runs/detect/predict[0m

image 1/1 /home/23ucc611/SWE/dataset_2/test/images/0_buffalo_0002.jpg: 672x1024 1 buffalo, 2807.9ms
Speed: 8.2ms preprocess, 2807.9ms inference, 2.1ms postprocess per image at shape (1, 3, 672, 1024)
Results saved to [1m/home/23ucc611/SWE/runs/detect/predict[0m

image 1/1 /home/23ucc611/SWE/dataset_2/test/images/0_buffalo_0003.jpg: 864x1024 1 buffalo, 2434.0ms
Speed: 10.3ms preprocess, 2434.0ms inference, 2.2ms postprocess per image at shape (1, 3, 864, 1024)
Results saved to [1m/home/23ucc611/SWE/runs/detect/predict[0m

image 1/1 /home/23ucc611/SWE/dataset_2/test/images/0_buffalo_0004.jpg: 704x1024 1 buffalo, 24.3ms
Speed: 8.8ms preprocess, 24.3ms inference, 2.5ms postprocess per image at shape (1, 3, 704, 1024)
Result

In [None]:
# Export best model to ONNX and TorchScript
if best_weights.exists():
    export_model = YOLO(str(best_weights))
else:
    export_model = trained

# Export to ONNX (use opset 12 or change as needed)
export_model.export(format="onnx")        # creates runs/export/..
export_model.export(format="torchscript")
print("Export complete; check runs/export folder.")


Ultralytics 8.3.229 üöÄ Python-3.10.18 torch-2.6.0+cu124 CPU (Intel Xeon CPU E5-2698 v4 @ 2.20GHz)
üí° ProTip: Export to OpenVINO format for best performance on Intel hardware. Learn more at https://docs.ultralytics.com/integrations/openvino/

[34m[1mPyTorch:[0m starting from '/home/23ucc611/SWE/runs/detect/wildlife_yolov8x_african/weights/best.pt' with input shape (1, 3, 1024, 1024) BCHW and output shape(s) (1, 8, 21504) (83.7 MB)
[31m[1mrequirements:[0m Ultralytics requirements ['onnx>=1.12.0,<=1.19.1', 'onnxslim>=0.1.71', 'onnxruntime-gpu'] not found, attempting AutoUpdate...
Collecting onnx<=1.19.1,>=1.12.0
  Downloading onnx-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (7.0 kB)
Collecting onnxslim>=0.1.71
  Downloading onnxslim-0.1.74-py3-none-any.whl.metadata (7.6 kB)
Collecting onnxruntime-gpu
  Downloading onnxruntime_gpu-1.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Collecting ml_dtypes>=0.5.0 (from on


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/23ucc611/miniconda3/envs/ml/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/23ucc611/miniconda3/envs/ml/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/23ucc611/miniconda3/envs/ml/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/23ucc611/miniconda3/envs/ml/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, 

AttributeError: _ARRAY_API not found

SystemError: <built-in function __import__> returned a result with an exception set

: 

: 

: 

: 