In [1]:
from pathlib import Path
import pandas as pd
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4" 
import torch
print("GPU name:", torch.cuda.get_device_name(0))
print("Total GPU mem (GB):", torch.cuda.get_device_properties(0).total_memory / (1024**3))
import time

runs_dir = Path("/home/23ucc611/SWE/dataset/runs") / "detect"
print("Runs folder:", runs_dir.resolve())
runs = sorted([d for d in runs_dir.iterdir() if d.is_dir()], key=lambda p: p.stat().st_mtime, reverse=True)

for r in runs:
    wdir = r / "weights"
    last = wdir / "last.pt"
    best = wdir / "best.pt"
    csv = r / "results.csv"
    mtime = r.stat().st_mtime
    print(f"\nRun: {r.name} (modified: {time.ctime(mtime)})")
    print("  weights/last.pt:", last.exists(), " size:", last.stat().st_size if last.exists() else None)
    print("  weights/best.pt:", best.exists(), " size:", best.stat().st_size if best.exists() else None)
    print("  results.csv:", csv.exists())
    if csv.exists():
        try:
            df = pd.read_csv(csv)
            print("   results.csv tail:")
            display(df.tail(3))
        except Exception as e:
            print("   Could not read results.csv:", e)


GPU name: Tesla V100-SXM2-32GB
Total GPU mem (GB): 31.7325439453125
Runs folder: /home/23ucc611/SWE/dataset/runs/detect

Run: wildlife_yolov8x_10245 (modified: Wed Nov 19 20:20:10 2025)
  weights/last.pt: True  size: 262666212
  weights/best.pt: True  size: 262666084
  results.csv: True
   results.csv tail:


Unnamed: 0,epoch,time,train/box_loss,train/cls_loss,train/dfl_loss,metrics/precision(B),metrics/recall(B),metrics/mAP50(B),metrics/mAP50-95(B),val/box_loss,val/cls_loss,val/dfl_loss,lr/pg0,lr/pg1,lr/pg2
32,33,11084.1,1.27458,2.51074,1.90923,0.39582,0.14376,0.11043,0.06714,2.75424,2.33787,4.90227,0.000683,0.000683,0.000683
33,34,11414.8,1.26485,2.48422,1.90599,0.43282,0.13923,0.11826,0.0694,2.73166,2.22792,5.0804,0.000673,0.000673,0.000673
34,35,11746.4,1.2533,2.46006,1.89859,0.1784,0.17345,0.11778,0.06849,2.71257,2.24611,5.11264,0.000663,0.000663,0.000663



Run: wildlife_yolov8x_10244 (modified: Wed Nov 19 20:11:03 2025)
  weights/last.pt: False  size: None
  weights/best.pt: False  size: None
  results.csv: False

Run: wildlife_yolov8x_10243 (modified: Wed Nov 19 20:09:31 2025)
  weights/last.pt: False  size: None
  weights/best.pt: False  size: None
  results.csv: False

Run: wildlife_yolov8x_10242 (modified: Wed Nov 19 20:07:19 2025)
  weights/last.pt: False  size: None
  weights/best.pt: False  size: None
  results.csv: False

Run: wildlife_yolov8x_1024 (modified: Wed Nov 19 20:03:40 2025)
  weights/last.pt: False  size: None
  weights/best.pt: False  size: None
  results.csv: False


In [2]:
# Option: auto-detect most recent run with weights/last.pt
runs_dir = Path("/home/23ucc611/SWE/dataset/runs") / "detect"
candidate = None
for r in sorted(runs_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True):
    if (r / "weights" / "last.pt").exists():
        candidate = r
        break

if candidate is None:
    raise SystemExit("No run with weights/last.pt found under runs/train. Check your folders.")

print("Auto-selected run to resume:", candidate.name)
RESUME_RUN = candidate.name
# if you prefer manual override, uncomment and set below:
# RESUME_RUN = "wildlife_yolov8x_10244"


Auto-selected run to resume: wildlife_yolov8x_10245


In [3]:
from pathlib import Path
import yaml
import torch
import pandas as pd

runs_dir = Path("/home/23ucc611/SWE/dataset/runs") / "detect"
run_dir = runs_dir / RESUME_RUN
print("Resuming run:", run_dir)

results_csv = run_dir / "results.csv"
last_pt = run_dir / "weights" / "last.pt"
best_pt = run_dir / "weights" / "best.pt"

if results_csv.exists():
    df = pd.read_csv(results_csv)
    # ultralytics results.csv often has 'epoch' column; else we use index
    if 'epoch' in df.columns:
        current_epoch = int(df['epoch'].dropna().max())
        print("Detected current epoch from results.csv:", current_epoch)
    else:
        current_epoch = int(df.index.max())
        print("No 'epoch' column in results.csv, using index:", current_epoch)
elif last_pt.exists():
    print("results.csv not found. Using last.pt modification time instead.")
    current_epoch = None
else:
    raise SystemExit("No results.csv or last.pt found in the chosen run.")

print("weights/last.pt exists?:", last_pt.exists(), "best.pt exists?:", best_pt.exists())
print("last.pt:", last_pt)
print("best.pt:", best_pt)


Resuming run: /home/23ucc611/SWE/dataset/runs/detect/wildlife_yolov8x_10245
Detected current epoch from results.csv: 35
weights/last.pt exists?: True best.pt exists?: True
last.pt: /home/23ucc611/SWE/dataset/runs/detect/wildlife_yolov8x_10245/weights/last.pt
best.pt: /home/23ucc611/SWE/dataset/runs/detect/wildlife_yolov8x_10245/weights/best.pt


In [4]:
# Set total target epochs for full training run
TOTAL_EPOCHS = 75   # change to your intended total epochs

# compute remaining epochs
try:
    remaining = TOTAL_EPOCHS - current_epoch if current_epoch is not None else None
    print("Total epochs target:", TOTAL_EPOCHS, "Current epoch:", current_epoch, "Remaining:", remaining)
except Exception as e:
    remaining = None
    print("Could not compute remaining epochs:", e)


Total epochs target: 75 Current epoch: 35 Remaining: 40


In [None]:
from ultralytics import YOLO
from pathlib import Path

# Paths / parameters
run_dir = Path("/home/23ucc611/SWE/dataset/runs") / "detect" / RESUME_RUN
last_ckpt = run_dir / "weights" / "last.pt"
if not last_ckpt.exists():
    raise SystemExit(f"No last.pt found at {last_ckpt}. Use a different run or use weights=best.pt or pretrained model.")

# load the checkpoint as the model to resume; ultralytics understands resume=True
model = YOLO(str(last_ckpt))  # load checkpoint

# training args (keep consistent with original run: imgsz, batch, device, etc.)
DATA_YAML = "/home/23ucc611/SWE/dataset/data_wildlife.yaml"  # or your data yaml path
IMG_SIZE = 1024
BATCH = 16
DEVICE = 0
WORKERS = 8
# IMPORTANT: set name to the same run name so ultralytics writes into same folder (it will create run-2 if not careful)
NAME = RESUME_RUN

# Resume training. Set epochs to TOTAL_EPOCHS (target total). ultralytics will continue from checkpoint epoch to reach TOTAL_EPOCHS.
print(f"Resuming training from {last_ckpt} and training until epoch {TOTAL_EPOCHS} (resume=True).")
model.train(
    data=DATA_YAML,
    epochs=TOTAL_EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH,
    device=DEVICE,
    workers=WORKERS,
    name=NAME,
    resume=True,
    save=True
)


Resuming training from /home/23ucc611/SWE/dataset/runs/detect/wildlife_yolov8x_10245/weights/last.pt and training until epoch 75 (resume=True).
Ultralytics 8.3.229 üöÄ Python-3.10.18 torch-2.6.0+cu124 CUDA:0 (Tesla V100-SXM2-32GB, 32494MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/home/23ucc611/SWE/dataset/data_wildlife.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=/home/23ucc61