In [4]:
import torch

from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Device check
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"⚙️ Using device: {device}")

# Transform: resize → tensor → normalize
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],  # ImageNet stats
        std=[0.229, 0.224, 0.225]
    )
])

# Dataset
train_ds = datasets.ImageFolder(root='cifar2/train', transform=transform)
valid_ds = datasets.ImageFolder(root='cifar2/valid', transform=transform)
class_names = train_ds.classes 
# DataLoaders
train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)
valid_loader = DataLoader(valid_ds, batch_size=8, shuffle=False)

# Print class names
print(f"✅ Classes: {train_ds.classes}")


⚙️ Using device: cpu
✅ Classes: ['cat', 'dog']


mobilenetv2


In [16]:
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
import time

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data transforms (smaller image size)
transform = transforms.Compose([
    transforms.Resize((96, 96)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# Dataset
train_ds = datasets.ImageFolder("cifar2/train", transform=transform)
valid_ds = datasets.ImageFolder("cifar2/valid", transform=transform)
train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
valid_loader = DataLoader(valid_ds, batch_size=1, shuffle=False)

# Model
mobilenet = models.mobilenet_v2(pretrained=True)
for param in mobilenet.parameters():
    param.requires_grad = False  # freeze backbone
mobilenet.classifier[1] = nn.Linear(mobilenet.last_channel, 2)
mobilenet = mobilenet.to(device)

# Training
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mobilenet.classifier.parameters(), lr=1e-3)

mobilenet.train()
for epoch in range(3):  # quick training
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = mobilenet(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"✅ Epoch {epoch+1} complete.")

# Save model
torch.save(mobilenet.state_dict(), "mobilenetv2.pth")




Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to C:\Users\viju1/.cache\torch\hub\checkpoints\mobilenet_v2-b0353104.pth


100.0%


✅ Epoch 1 complete.
✅ Epoch 2 complete.
✅ Epoch 3 complete.


In [20]:
mobilenet.eval()
y_true, y_pred = [], []
inference_times = []

with torch.inference_mode():
    for images, labels in valid_loader:
        images = images.to(device)
        start = time.time()
        outputs = mobilenet(images)
        end = time.time()

        preds = outputs.argmax(dim=1).cpu().item()
        y_pred.append(preds)
        y_true.append(labels.item())
        inference_times.append((end - start) * 1000)

accuracy = accuracy_score(y_true, y_pred)
avg_time = sum(inference_times) / len(inference_times)

print(f"🎯 MobileNetV2 Accuracy: {accuracy*100:.2f}%")
print(f"⚡ Avg Inference Time: {avg_time:.2f} ms/image")


🎯 MobileNetV2 Accuracy: 86.00%
⚡ Avg Inference Time: 19.21 ms/image


MobileNetV3Small

In [21]:
from torchvision.models import mobilenet_v3_small

# Model
mobilenetv3 = mobilenet_v3_small(pretrained=True)
for param in mobilenetv3.parameters():
    param.requires_grad = False  # freeze backbone
mobilenetv3.classifier[3] = nn.Linear(mobilenetv3.classifier[3].in_features, 2)
mobilenetv3 = mobilenetv3.to(device)

# Optimizer
optimizer = torch.optim.Adam(mobilenetv3.classifier.parameters(), lr=1e-3)

# Training
mobilenetv3.train()
for epoch in range(3):  # minimal epochs
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = mobilenetv3(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"✅ Epoch {epoch+1} done.")

# Save model
torch.save(mobilenetv3.state_dict(), "mobilenetv3small.pth")




Downloading: "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth" to C:\Users\viju1/.cache\torch\hub\checkpoints\mobilenet_v3_small-047dcff4.pth


100.0%


✅ Epoch 1 done.
✅ Epoch 2 done.
✅ Epoch 3 done.


In [23]:
mobilenetv3.eval()
y_true, y_pred = [], []
inference_times = []

with torch.inference_mode():
    for images, labels in valid_loader:
        images = images.to(device)
        start = time.time()
        outputs = mobilenetv3(images)
        end = time.time()

        preds = outputs.argmax(dim=1).cpu().item()
        y_pred.append(preds)
        y_true.append(labels.item())
        inference_times.append((end - start) * 1000)

accuracy = accuracy_score(y_true, y_pred)
avg_time = sum(inference_times) / len(inference_times)

print(f"🎯 MobileNetV3Small Accuracy: {accuracy*100:.2f}%")
print(f"⚡ Avg Inference Time: {avg_time:.2f} ms/image")


🎯 MobileNetV3Small Accuracy: 76.00%
⚡ Avg Inference Time: 11.13 ms/image


In [36]:
from ultralytics import YOLO

# Load YOLOv8n classifier
yolo_model = YOLO('yolov8n-cls.pt')

# Train on your dataset with smaller image size
results = yolo_model.train(
    data='cifar2',      # path to dataset
    epochs=5,           
    imgsz=96,           # small image size for speed
    batch=16,
    name='yolov8n_cls_96'
)


Ultralytics 8.3.174  Python-3.13.5 torch-2.7.1+cpu CPU (11th Gen Intel Core(TM) i5-1155G7 2.50GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=cifar2, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=5, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=96, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n-cls.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolov8n_cls_962, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pos

[34m[1mtrain: [0mScanning C:\Users\viju1\Desktop\img classification\cifar2\train... 100 images, 0 corrupt: 100%|██████████| 100/100 [00:00<?, ?it/s]

[34m[1mval: [0mFast image access  (ping: 0.00.0 ms, read: 186.7104.3 MB/s, size: 18.8 KB)



[34m[1mval: [0mScanning C:\Users\viju1\Desktop\img classification\cifar2\valid... 100 images, 0 corrupt: 100%|██████████| 100/100 [00:00<?, ?it/s]

[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 26 weight(decay=0.0), 27 weight(decay=0.0005), 27 bias(decay=0.0)
Image sizes 96 train, 96 val
Using 0 dataloader workers
Logging results to [1mruns\classify\yolov8n_cls_962[0m
Starting training for 5 epochs...

      Epoch    GPU_mem       loss  Instances       Size



        1/5         0G     0.7843          4         96: 100%|██████████| 7/7 [00:01<00:00,  5.77it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 4/4 [00:00<00:00,  9.37it/s]

                   all       0.56          1






      Epoch    GPU_mem       loss  Instances       Size


        2/5         0G     0.6516          4         96: 100%|██████████| 7/7 [00:01<00:00,  6.88it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 4/4 [00:00<00:00, 11.00it/s]

                   all       0.66          1






      Epoch    GPU_mem       loss  Instances       Size


        3/5         0G     0.5845          4         96: 100%|██████████| 7/7 [00:01<00:00,  6.51it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 4/4 [00:00<00:00, 12.47it/s]

                   all       0.71          1

      Epoch    GPU_mem       loss  Instances       Size



        4/5         0G     0.5216          4         96: 100%|██████████| 7/7 [00:00<00:00,  9.01it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 4/4 [00:00<00:00, 13.46it/s]

                   all       0.73          1






      Epoch    GPU_mem       loss  Instances       Size


        5/5         0G     0.5176          4         96: 100%|██████████| 7/7 [00:00<00:00,  8.76it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 4/4 [00:00<00:00, 10.56it/s]

                   all       0.73          1






5 epochs completed in 0.002 hours.
Optimizer stripped from runs\classify\yolov8n_cls_962\weights\last.pt, 3.0MB
Optimizer stripped from runs\classify\yolov8n_cls_962\weights\best.pt, 3.0MB

Validating runs\classify\yolov8n_cls_962\weights\best.pt...
Ultralytics 8.3.174  Python-3.13.5 torch-2.7.1+cpu CPU (11th Gen Intel Core(TM) i5-1155G7 2.50GHz)
YOLOv8n-cls summary (fused): 30 layers, 1,437,442 parameters, 0 gradients, 3.3 GFLOPs
[34m[1mtrain:[0m C:\Users\viju1\Desktop\img classification\cifar2\train... found 100 images in 2 classes  
[34m[1mval:[0m C:\Users\viju1\Desktop\img classification\cifar2\valid... found 100 images in 2 classes  
[34m[1mtest:[0m None...


               classes   top1_acc   top5_acc: 100%|██████████| 4/4 [00:00<00:00,  7.80it/s]


                   all       0.73          1
Speed: 0.0ms preprocess, 1.7ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns\classify\yolov8n_cls_962[0m


In [37]:
import os
from PIL import Image
import time

# Load best weights
yolo_loaded = YOLO('runs/classify/yolov8n_cls_96/weights/best.pt')
yolo_loaded.fuse()

# Collect test images
valid_path = 'cifar2/valid'
image_paths = []
labels = []

for class_index, class_name in enumerate(sorted(os.listdir(valid_path))):
    class_dir = os.path.join(valid_path, class_name)
    for fname in os.listdir(class_dir):
        if fname.endswith((".jpg", ".jpeg", ".png")):
            image_paths.append(os.path.join(class_dir, fname))
            labels.append(class_index)

# Inference
y_true, y_pred, inference_times = [], [], []

for path, label in zip(image_paths, labels):
    img = Image.open(path).convert('RGB')
    
    start = time.time()
    results = yolo_loaded(img, verbose=False)
    end = time.time()
    
    pred = int(results[0].probs.top1)
    y_true.append(label)
    y_pred.append(pred)
    inference_times.append((end - start) * 1000)

# Accuracy & time
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_true, y_pred)
avg_time = sum(inference_times) / len(inference_times)

print(f"🎯 YOLOv8n Accuracy: {acc * 100:.2f}%")
print(f"⚡ Avg Inference Time: {avg_time:.2f} ms/image")


YOLOv8n-cls summary (fused): 30 layers, 1,437,442 parameters, 0 gradients, 3.3 GFLOPs
🎯 YOLOv8n Accuracy: 73.00%
⚡ Avg Inference Time: 8.60 ms/image
