# Oxford Pet Classification

In [None]:
!wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz  -P /content --no-check-certificate
!wget http://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz  -P /content --no-check-certificate

In [None]:
%cd /content

!tar -C /content -xvf /content/images.tar.gz
!tar -C /content -xvf /content/annotations.tar.gz

In [None]:
!pwd

In [None]:
from collections import Counter
from pathlib import Path
from string import ascii_lowercase, ascii_uppercase

import matplotlib.pyplot as plt
import torch
from PIL import Image
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms
from tqdm import tqdm
from sklearn.metrics import classification_report

In [None]:
torch.cuda.is_available()

In [None]:
root = Path("/content")
images_dir = root / "images"
# All images with 1st letter as captial are cat images
# images with small first letter are dog images
jpgs = sorted(list(images_dir.glob("*.jpg")))
print(len(jpgs))
labels = []
for j in jpgs:
    first_letter = j.name[0]
    if first_letter in ascii_lowercase:
        labels.append("dog")
    else:
        labels.append("cat")
label_to_id = {"cat": 0, "dog": 1}
id_to_label = {0: "cat", 1: "dog"}

labels_count = Counter(labels)
plt.title("Labels Frequency")
plt.bar(labels_count.keys(), labels_count.values(), color="seagreen")
plt.show()

In [None]:
image_transform = transforms.Compose(
    [
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
    ]
)

In [None]:
class OxfordDataset(Dataset):
    def __init__(self, image_paths, labels, label_to_id, image_transform):
        self.image_paths = image_paths
        self.labels = labels
        self.label_to_id = label_to_id
        self.image_transform = image_transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert("RGB")
        lbl = self.labels[idx]
        lbl_id = self.label_to_id[lbl]
        img_tsr = self.image_transform(img)
        lbl_tsr = torch.tensor(lbl_id, dtype=torch.long)
        return img_tsr, lbl_tsr


dataset = OxfordDataset(jpgs, labels, label_to_id, image_transform)
train_dataset, test_dataset = random_split(
    dataset,
    [0.8, 0.2],
    torch.Generator().manual_seed(42),
)
batch_size = 512
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=2,
)
test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=2,
)

-> CONV/FC -> BatchNorm -> ReLu(or other activation) -> Dropout -> CONV/FC ->

In [None]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()

        self.conv = nn.Conv2d(
            in_channels, out_channels, kernel_size=3, stride=1, padding=1
        )
        self.norm = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout2d(0.1)
        self.pool = nn.MaxPool2d(2)

    def forward(self, x):
        x = self.conv(x)
        x = self.norm(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.pool(x)
        return x


model = nn.Sequential(
    ConvBlock(3, 8),
    ConvBlock(8, 16),
    ConvBlock(16, 32),
    ConvBlock(32, 64),
    ConvBlock(64, 128),
    ConvBlock(128, 256),
    ConvBlock(256,512),

    nn.AdaptiveAvgPool2d((1, 1)),
    nn.Flatten(),
    nn.Linear(512, 2),
)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"{device=}")
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-5)

In [None]:
best_eval_loss = float("inf")
for epoch in range(100):
    loss_sum = 0.0
    counter = 0
    model.train()
    pbar = tqdm(train_loader, desc=f"Training Epoch {epoch}")
    for batch in pbar:
        x, y = batch
        x = x.to(device)
        y = y.to(device)
        logits = model(x)
        loss = criterion(logits, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_sum += loss.item()
        counter += 1
        mean_train_loss = loss_sum / counter

        pbar.set_postfix({"mean_loss": mean_train_loss})

    loss_sum = 0.0
    counter = 0
    model.eval()
    pbar = tqdm(test_loader, desc=f"Evaluating Epoch {epoch}")
    y_true = []
    y_pred = []
    for batch in pbar:
        with torch.no_grad():
            x, y = batch
            x = x.to(device)
            y = y.to(device)
            logits = model(x)
            preds = logits.argmax(dim=1)
            y_true.extend(y.tolist())
            y_pred.extend(preds.tolist())

        loss = criterion(logits, y)

        loss_sum += loss.item()
        counter += 1
        mean_eval_loss = loss_sum / counter

        pbar.set_postfix({"mean_loss": mean_eval_loss})

    if mean_eval_loss < best_eval_loss:
        best_eval_loss = mean_eval_loss
        torch.save(model.state_dict(), "oxford_cat_dog_best.pt")

    print("Evaluation Classification Report")
    print(classification_report(y_true, y_pred))

    print("#" * 88)

model.eval()
torch.save(model.state_dict(), "oxford_cat_dog_last.pt")