In [17]:
import torch
import torch.nn as nn
import torchvision.models as models
from pathlib import Path
import sys

In [18]:
sys.path.append('..')
from training.dataset import (
    get_image_paths, 
    get_class_mapping, 
    get_idx_to_class,
    AnimalDataset,
    get_train_transform,
    get_val_transform
)
from torch.utils.data import DataLoader

Loading the pretrained model

In [19]:
model = models.mobilenet_v3_small(weights='IMAGENET1K_V1')
print(f"Model loaded successfully")
print(model)
print(f"\nClassifier:")
print(model.classifier)

Model loaded successfully
MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): SqueezeExcitation(
          (avgpool): AdaptiveAvgPool2d(output_size=1)
          (fc1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1))
          (fc2): Conv2d(8, 16, kernel_size=(1, 1), stride=(1, 1))
          (activation): ReLU()
          (scale_activation): Hardsigmoid()
        )
        (2): Conv2dNormActivation(
          (0): Conv2d(16

Freeze backbone to only train the classifier

In [20]:
for param in model.features.parameters():
    param.requires_grad = False

    

In [21]:
frozen_params = sum(1 for p in model.features.parameters() if not p.requires_grad)
total_params = sum(1 for p in model.features.parameters())
print(f"Frozen parameters: {frozen_params}/{total_params}")

Frozen parameters: 138/138


In [22]:
in_features = model.classifier[0].in_features

model.classifier = nn.Sequential(
    nn.Linear(in_features, 128), # Fully connected layer for matrix multiplication
    nn.ReLU(), # Activation function for non-linearity
    nn.Dropout(0.2), # Randomly zeros some neurons to prevent overfitting
    nn.Linear(128,6) 
)

print(f"New classifier: {model.classifier}")

New classifier: Sequential(
  (0): Linear(in_features=576, out_features=128, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.2, inplace=False)
  (3): Linear(in_features=128, out_features=6, bias=True)
)


In [23]:
print("\n=== Testing with Real Data ===")
data_dir = "../../photo_data"
paths = get_image_paths(data_dir)
mapping = get_class_mapping()
idx_to_class = get_idx_to_class()

print(f"Found {len(paths)} images")

if len(paths) > 0:
    # Create dataset and dataloader
    transform = get_train_transform()
    dataset = AnimalDataset(paths[:5], mapping, transform=transform)  # Just test with 5 images
    dataloader = DataLoader(dataset, batch_size=2, shuffle=False)
    
    # Get one batch
    images, labels = next(iter(dataloader))
    print(f"Batch shape: {images.shape}")
    print(f"Labels: {labels}")
    
    # Run through model
    model.eval()
    with torch.no_grad():
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)
        predicted = torch.argmax(probs, dim=1)
        
    print(f"\nPredictions:")
    for i in range(len(labels)):
        true_class = idx_to_class[labels[i].item()]
        pred_class = idx_to_class[predicted[i].item()]
        confidence = probs[i][predicted[i]].item()
        print(f"  Image {i}: True={true_class}, Predicted={pred_class} (conf={confidence:.2f})")


=== Testing with Real Data ===
Found 41 images
Batch shape: torch.Size([2, 3, 224, 224])
Labels: tensor([2, 2])

Predictions:
  Image 0: True=coyote, Predicted=coyote (conf=0.24)
  Image 1: True=coyote, Predicted=coyote (conf=0.24)


In [24]:
model.eval()
dummy_input = torch.randn(1, 3, 224, 224)
with torch.no_grad():
    output = model(dummy_input)
    print(f"Output shape: {output.shape}")

Output shape: torch.Size([1, 6])
