In [None]:
# Extracting Dataset from Roboflow Universe

In [None]:
from roboflow import Roboflow
from pathlib import Path

In [None]:
from roboflow import Roboflow
from pathlib import Path

API_KEY   = "5lzgrgM9jrG8I98TYG2O"
WORKSPACE = "aisnake"
PROJECT   = "snakes-flxdk"
VERSION   = 7
# 1  Authenticate and pick the dataset
rf       = Roboflow(api_key=API_KEY)
version  = rf.workspace(WORKSPACE).project(PROJECT).version(VERSION)

# 2  Choose a target directory on F:
target_dir = r"F:\datasets\snakes-flxdk-7"      # raw‑string avoids back‑slash escapes
dataset    = version.download("yolov8", location=target_dir)

print("Data is now at:", dataset.location)      # should print F:\datasets\snakes-flxdk-7

In [None]:
from pathlib import Path
base_path = Path(dataset.location)  


In [None]:
print("Dataset saved to:", dataset.location)
# → '/home/…/snakes-flxdk-7'

In [None]:
# Count  and print number of images in each split (train, valid, test) and total image in the dataset

In [None]:
from pathlib import Path
splits = ["train", "valid", "test"]

total = 0
for s in splits:
    n = len(list((base / s / "images").glob("*.*")))
    total += n
    print(f"{s.capitalize()} images: {n}")

print(f"Total images: {total}")


In [None]:
#  Checking that each image has a label in train, valid, and test folders
# This helps make sure the dataset is correct before training
for split in ["train", "valid", "test"]:
    images = list((base_path / split / "images").glob("*.jpg"))
    labels = list((base_path / split / "labels").glob("*.txt"))

    print(f"{split.capitalize()}:")
    print(f"  Images: {len(images)}")
    print(f"  Labels: {len(labels)}")
    print(f"  Missing labels: {len(images) - len(labels)}\n")

In [None]:
#import necessary libaries

In [None]:
import os
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import random

In [None]:
# Path to the dataset
DATASET_PATH = Path(r"G:/datasets/snakes-flsxdk-7")

splits = ["train", "valid", "test"]

class_counts = {}
image_dims = []
object_counts = []

for split in splits:
    label_dir = DATASET_PATH / split / "labels"
    image_dir = DATASET_PATH / split / "images"
    
    for label_file in label_dir.glob("*.txt"):
        with open(label_file, "r") as f:
            lines = f.readlines()

        object_counts.append(len(lines))  # # of objects in the image

        for line in lines:
            class_id = line.strip().split()[0]
            class_counts[class_id] = class_counts.get(class_id, 0) + 1

        # Get image dimensions
        image_file = image_dir / label_file.with_suffix(".jpg").name
        if not image_file.exists():
            image_file = label_file.with_suffix(".png")
        try:
            with Image.open(image_file) as img:
                image_dims.append(img.size)
        except Exception as e:
            print(f"Failed to read {image_file}: {e}")

In [None]:
print(class_counts)        # should show something like {'0': 2000}


In [None]:
from pathlib import Path

base_path = Path(r"F:/datasets/snakes-flxdk-7")
label_files = list((base_path / "train" / "labels").glob("*.txt"))
print(f"Label files found: {len(label_files)}")


In [None]:
class_counts = {}

for label_file in (base_path / "train" / "labels").glob("*.txt"):
    with open(label_file, "r") as f:
        for line in f:
            if line.strip():  # skip empty lines
                cls_id = line.strip().split()[0]
                class_counts[cls_id] = class_counts.get(cls_id, 0) + 1

print(class_counts)


In [None]:
# Class distribution
plt.figure(figsize=(6, 4))
sns.barplot(x=list(class_counts.keys()), y=list(class_counts.values()))
plt.title("Class Distribution")
plt.xlabel("Class ID")
plt.ylabel("Count")
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# class distribution : All images are labeled as class 0, which means the dataset only has one type of object — snakes.

In [None]:
total_labels   = sum(class_counts.values())   # objects
total_img_files = len(list((base_path / "train" / "labels").glob("*.txt")))
print(f"Objects  : {total_labels}")
print(f"Image files : {total_img_files}")
print(f"Avg objects per image : {total_labels/total_img_files:.2f}")


In [None]:
# Summary: 1403 snake objects in 1262 images (~1.1 per image). Dataset looks balanced and ready to train.

In [None]:
print("Length of object_counts:", len(object_counts))


In [None]:
object_counts = []

for label_file in (base_path / "train" / "labels").glob("*.txt"):
    with open(label_file) as f:
        lines = [ln for ln in f if ln.strip()]
        object_counts.append(len(lines))   # how many boxes in this image


In [None]:
#  Object count per image

In [None]:

%matplotlib inline  

import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(6, 4))
sns.histplot(object_counts, bins=15, kde=True)
plt.title("Number of Objects per Image")
plt.xlabel("# Objects")
plt.ylabel("Image Count")
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Most images have only one snake, which makes the model easier to train. 
# Images with two or more snakes are rare, but the model can still detect multiple snakes if they appear.


In [None]:
from pathlib import Path
from PIL import Image

base_path = Path(r"F:/datasets/snakes-flxdk-7")    # adjust if needed
splits = ["train", "valid", "test"]

image_dims = []

for split in splits:
    for img_file in (base_path / split / "images").glob("*.*"):
        with Image.open(img_file) as img:
            image_dims.append(img.size)    # (width, height)


In [None]:
# Image dimension distribution
import matplotlib.pyplot as plt
import seaborn as sns

# make sure image_dims is not empty
if image_dims:
    widths, heights = zip(*image_dims)

    plt.figure(figsize=(6, 4))
    sns.histplot(widths,  color="blue",  label="Width",  kde=True, stat="density")
    sns.histplot(heights, color="green", label="Height", kde=True, stat="density")
    plt.title("Image Width and Height Distribution")
    plt.xlabel("Pixels")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()
else:
    print("image_dims is empty—check dataset path or image reading loop.")


In [None]:
#Image Size Distribution :All images have the same size (640x640). The spike in the density plot shows that 100% of images are of this size,
#which is perfect for YOLO training.

In [None]:
# Dataset Summary:
# EDA confirms the dataset is clean and consistent — it has a single class (snake),
# most images contain only 1 object, and all images are of uniform size.
# This makes the dataset well-suited for training a YOLO object detection model.

In [None]:
!pip install ultralytics opencv-python matplotlib


In [None]:
from ultralytics import YOLO

data_yaml = r"F:\datasets\snakes-flxdk-7\data.yaml"

model = YOLO("yolov8s.pt")  # or use a different pretrained checkpoint if preferred

model.train(
    data=data_yaml,
    epochs=50,           # Increase for better learning
    imgsz=640,           # Standard size for YOLOv8
    batch=32,            # Works fine on most systems; increase if you have more GPU memory
    name="snakes_v7"     # This will go to runs/detect/snakes_v7
)


In [None]:
from pathlib import Path

# Get full path to best.pt in your current run folder
best_path = Path("runs/detect/snakes_v7/weights/best.pt").resolve()

print("✅ Your best.pt is located at:\n", best_path)


In [None]:
from pathlib import Path
for p in Path("runs/detect").glob("snakes_v7*"):
    print(p)


In [None]:
from ultralytics import YOLO
import cv2, os

src   = r"F:\datasets\snakes-flxdk-7\train\images\00f4b755cf5e796f940b4e63e58d8260_jpg.rf.22ec998708a06806b80b1435ca27e25d.jpg"
model = YOLO(r"runs\detect\snakes_v7\weights\best.pt")

result    = model(src, imgsz=640, conf=0.25)[0]
annotated = result.plot()

results_dir = r"F:\results"                          # <── define this
os.makedirs(results_dir, exist_ok=True)

save_path = os.path.join(results_dir, "annotated_snake.jpg")
cv2.imwrite(save_path, annotated)

print(f"✅ Saved to {save_path}")



In [None]:
from pathlib import Path
import datetime

# Set path to YOLOv8 results folder
detect_dir = Path("runs/detect")

# List all 'val' folders inside 'runs/detect'
val_folders = [f for f in detect_dir.glob("val*") if f.is_dir()]

# Sort them by modification time (latest first)
val_folders_sorted = sorted(val_folders, key=lambda x: x.stat().st_mtime, reverse=True)

# Print them with timestamps
for folder in val_folders_sorted:
    mod_time = datetime.datetime.fromtimestamp(folder.stat().st_mtime)
    print(f"{folder.name} - Last modified: {mod_time}")
