### Import required libraries
Load all necessary libraries for data handling, model training, and transformation.

In [3]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from skorch import NeuralNetClassifier
from skorch.helper import predefined_split
from skorch.callbacks import Checkpoint, Freezer
import shutil
from sklearn.model_selection import train_test_split

### Prepare dataset: Split into train and validation sets
Organize the image files by breed and split each breed's images into train (80%) and validation (20%) subsets.

In [6]:
image_dir = "images"
output_dir = "data"

def print_progress(current, total, bar_length=40):
    percent = current / total
    arrow = '=' * int(bar_length * percent)
    spaces = ' ' * (bar_length - len(arrow))
    print(f"\rProgress: [{arrow}{spaces}] {int(percent * 100)}%", end='')

all_images = [f for f in os.listdir(image_dir) if f.endswith(".jpg")]
breeds = sorted(set([f.rsplit("_", 1)[0] for f in all_images]))

breed_to_images = {breed: [] for breed in breeds}
for img in all_images:
    breed = img.rsplit("_", 1)[0]
    breed_to_images[breed].append(img)
    
total_breeds = len(breed_to_images)
for i, (breed, files) in enumerate(breed_to_images.items(), 1):
    train_files, val_files = train_test_split(files, test_size=0.2, random_state=42)

    for split, split_files in [("train", train_files), ("val", val_files)]:
        dest_dir = os.path.join(output_dir, split, breed)
        os.makedirs(dest_dir, exist_ok=True)
        for file in split_files:
            src_path = os.path.join(image_dir, file)
            dst_path = os.path.join(dest_dir, file)
            shutil.copy(src_path, dst_path)

    print_progress(i, total_breeds)



# Load and transform dataset
Apply preprocessing steps like resizing, normalization, and optional augmentation for training images.

In [5]:
DATA_DIR = "data"
BATCH_SIZE = 32
NUM_EPOCHS = 10
NUM_WORKERS = 2
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

print("Loading datasets")
train_dataset = datasets.ImageFolder(os.path.join(DATA_DIR, "train"), transform=train_transform)
val_dataset = datasets.ImageFolder(os.path.join(DATA_DIR, "val"), transform=val_transform)

num_classes = len(train_dataset.classes)
print(f"Found {num_classes} classes:")
for idx, label in enumerate(train_dataset.classes):
    print(f"  {idx}: {label}")

Loading datasets
Found 37 classes:
  0: Abyssinian
  1: Bengal
  2: Birman
  3: Bombay
  4: British_Shorthair
  5: Egyptian_Mau
  6: Maine_Coon
  7: Persian
  8: Ragdoll
  9: Russian_Blue
  10: Siamese
  11: Sphynx
  12: american_bulldog
  13: american_pit_bull_terrier
  14: basset_hound
  15: beagle
  16: boxer
  17: chihuahua
  18: english_cocker_spaniel
  19: english_setter
  20: german_shorthaired
  21: great_pyrenees
  22: havanese
  23: japanese_chin
  24: keeshond
  25: leonberger
  26: miniature_pinscher
  27: newfoundland
  28: pomeranian
  29: pug
  30: saint_bernard
  31: samoyed
  32: scottish_terrier
  33: shiba_inu
  34: staffordshire_bull_terrier
  35: wheaten_terrier
  36: yorkshire_terrier


# Define the custom model using pretrained ResNet-18
Load a pretrained ResNet-18 model and replace its final classification layer to match the number of classes.

In [7]:
class PretrainedModel(nn.Module):
    def __init__(self, output_features):
        super().__init__()
        model = models.resnet18(weights="IMAGENET1K_V1")
        model.fc = nn.Linear(model.fc.in_features, output_features)
        self.model = model
        
    def forward(self, x):
        return self.model(x)

# Configure the neural network and training setup
Define the skorch wrapper with the model, training params, validation set, checkpointing, and freezing.

In [13]:
checkpoint = Checkpoint(f_params="best_model.pt", monitor="valid_acc_best")
freezer = Freezer(lambda x: not x.startswith("model.fc"))

net = NeuralNetClassifier(
    module=PretrainedModel,
    module__output_features=num_classes,
    criterion=nn.CrossEntropyLoss,
    optimizer=optim.Adam,
    lr=0.001,
    batch_size=BATCH_SIZE,
    max_epochs=NUM_EPOCHS,
    iterator_train__shuffle=True,
    iterator_train__num_workers=NUM_WORKERS,
    iterator_valid__num_workers=NUM_WORKERS,
    train_split=predefined_split(val_dataset),
    callbacks=[checkpoint, freezer],
    device=DEVICE,
    classes=list(range(num_classes))
)

# Train the model
Fit the neural network on the training dataset while validating on the predefined validation set.

In [14]:
net.fit(train_dataset, y=None)

  epoch    train_loss    valid_acc    valid_loss    cp      dur
-------  ------------  -----------  ------------  ----  -------
      1        [36m1.5053[0m       [32m0.8506[0m        [35m0.6653[0m     +  62.5150
      2        [36m0.5717[0m       [32m0.8884[0m        [35m0.4491[0m     +  62.0142
      3        [36m0.4347[0m       [32m0.8938[0m        [35m0.3913[0m     +  62.1468
      4        [36m0.3648[0m       [32m0.8945[0m        [35m0.3470[0m     +  61.9349
      5        [36m0.3225[0m       [32m0.8999[0m        [35m0.3371[0m     +  61.6443
      6        [36m0.2817[0m       [32m0.9053[0m        [35m0.3177[0m     +  61.6371
      7        [36m0.2631[0m       0.9026        [35m0.3129[0m        61.5259
      8        [36m0.2455[0m       [32m0.9080[0m        [35m0.3060[0m     +  61.6376
      9        [36m0.2220[0m       [32m0.9087[0m        [35m0.2926[0m     +  61.6459
     10        [36m0.2086[0m       0.9040        0.2951     