# Convolutional Neural Networks for Image Classification

In [7]:
from datasets import load_dataset


ds = load_dataset("pantelism/cats-vs-dogs")

dogs-vs-cats-subset.zip:   0%|          | 0.00/90.8M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/4000 [00:00<?, ? examples/s]

In [8]:
import torch
from datasets import load_dataset
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
from torchsummary import summary
from torchvision import transforms


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print(torch.cuda.current_device())
    print(torch.cuda.device_count())
    print(torch.cuda.get_device_name(0))

device

device(type='cpu')

In [9]:
ds

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 4000
    })
})

In [None]:
split = ds['train'].train_test_split(test_size=0.2, seed=42)
val_test = split['test'].train_test_split(test_size=0.5, seed=42)

train_dataset = split['train']
val_dataset = val_test['train']
test_dataset = val_test['test']

In [11]:
print("Train:", len(train_dataset))
print("Val:", len(val_dataset))
print("Test:", len(test_dataset))


Train: 3200
Val: 400
Test: 400


In [12]:
transform = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [13]:
class HFDataset(Dataset):
    def __init__(self, hf_ds, transform=None):
        self.ds = hf_ds
        self.transform = transform
    def __len__(self):
        return len(self.ds)
    def __getitem__(self, idx):
        item = self.ds[idx]
        img = item["image"].convert("RGB")
        if self.transform:
            img = self.transform(img)
        label = int(item["label"])
        return img, label

train_ds = HFDataset(train_dataset, transform)
val_ds   = HFDataset(val_dataset, transform)
test_ds  = HFDataset(test_dataset, transform)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=32, shuffle=False)
test_loader  = DataLoader(test_ds, batch_size=32, shuffle=False)



In [14]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(128 * 9 * 9, 1)
        )
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [15]:
model = CNN().to(device)
summary(model, (3, 150, 150))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 150, 150]             896
              ReLU-2         [-1, 32, 150, 150]               0
         MaxPool2d-3           [-1, 32, 75, 75]               0
            Conv2d-4           [-1, 64, 75, 75]          18,496
              ReLU-5           [-1, 64, 75, 75]               0
         MaxPool2d-6           [-1, 64, 37, 37]               0
            Conv2d-7          [-1, 128, 37, 37]          73,856
              ReLU-8          [-1, 128, 37, 37]               0
         MaxPool2d-9          [-1, 128, 18, 18]               0
           Conv2d-10          [-1, 128, 18, 18]         147,584
             ReLU-11          [-1, 128, 18, 18]               0
        MaxPool2d-12            [-1, 128, 9, 9]               0
           Linear-13                    [-1, 1]          10,369
Total params: 251,201
Trainable params:

In [61]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN().to(device)  # use defined CNN class
# Use BCEWithLogitsLoss because the model outputs raw logits (no sigmoid)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.RMSprop(model.parameters(), lr=1e-4)


In [62]:
train_acc, val_acc, train_loss, val_loss = [], [], [], []

for epoch in range(20):
    model.train()
    running_loss, correct, total = 0, 0, 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.float().unsqueeze(1).to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        probs = torch.sigmoid(outputs)
        preds = (probs > 0.5).float()
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    train_acc.append(correct/total)
    train_loss.append(running_loss/len(train_loader))

    model.eval()
    v_loss, v_correct, v_total = 0, 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.float().unsqueeze(1).to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            v_loss += loss.item()
            probs = torch.sigmoid(outputs)
            preds = (probs > 0.5).float()
            v_correct += (preds == labels).sum().item()
            v_total += labels.size(0)
    val_acc.append(v_correct/v_total)
    val_loss.append(v_loss/len(val_loader))
    print(f"Epoch {epoch+1} | Train Acc: {train_acc[-1]:.3f} | Val Acc: {val_acc[-1]:.3f}")


Epoch 1 | Train Acc: 0.553 | Val Acc: 0.568
Epoch 2 | Train Acc: 0.635 | Val Acc: 0.600
Epoch 3 | Train Acc: 0.678 | Val Acc: 0.642
Epoch 4 | Train Acc: 0.701 | Val Acc: 0.660
Epoch 5 | Train Acc: 0.715 | Val Acc: 0.670
Epoch 6 | Train Acc: 0.724 | Val Acc: 0.682
Epoch 7 | Train Acc: 0.742 | Val Acc: 0.688
Epoch 8 | Train Acc: 0.751 | Val Acc: 0.680
Epoch 9 | Train Acc: 0.757 | Val Acc: 0.703
Epoch 10 | Train Acc: 0.769 | Val Acc: 0.735
Epoch 11 | Train Acc: 0.775 | Val Acc: 0.723
Epoch 12 | Train Acc: 0.782 | Val Acc: 0.728
Epoch 13 | Train Acc: 0.782 | Val Acc: 0.715
Epoch 14 | Train Acc: 0.787 | Val Acc: 0.685
Epoch 15 | Train Acc: 0.793 | Val Acc: 0.738
Epoch 16 | Train Acc: 0.802 | Val Acc: 0.733
Epoch 17 | Train Acc: 0.807 | Val Acc: 0.740
Epoch 18 | Train Acc: 0.812 | Val Acc: 0.752
Epoch 19 | Train Acc: 0.816 | Val Acc: 0.735
Epoch 20 | Train Acc: 0.825 | Val Acc: 0.752


In [63]:
torch.save(model.state_dict(), 'cat_dog_cnn_model.pth')

# Don't grade this cs i didnt have the time

In [65]:
# Attempt 2 cs that sucked
import torch


print(torch.backends.mps.is_available())

False


In [16]:
import torch


if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


In [17]:
import platform; print(platform.machine())

arm64


In [None]:
import torch
from torch import nn, optim


class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(128, 128, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.gap = nn.AdaptiveAvgPool2d(1)   # -> (B,128,1,1)
        self.classifier = nn.Linear(128, 1)  # -> expects (B,128)

    def forward(self, x):
        x = self.features(x)                 # (B,128,H,W)
        x = self.gap(x)                      # (B,128,1,1)
        x = torch.flatten(x, 1)              # (B,128)
        x = self.classifier(x)               # (B,1)
        return x

device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(device)
model = CNN().to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)


mps


In [None]:
best_val = float('inf'); patience=5; bad=0
train_acc, val_acc, train_loss, val_loss = [], [], [], []

for epoch in range(40):
    model.train()
    tot_loss = tot_correct = tot = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.float().unsqueeze(1).to(device)

        optimizer.zero_grad()
        logits = model(images)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        tot_loss += loss.item()
        preds = (torch.sigmoid(logits) > 0.5).float()
        tot_correct += (preds == labels).sum().item()
        tot += labels.size(0)

    tr_loss = tot_loss/len(train_loader)
    tr_acc = tot_correct/tot

    model.eval()
    v_loss = v_correct = v_tot = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.float().unsqueeze(1).to(device)
            logits = model(images)
            loss = criterion(logits, labels)
            v_loss += loss.item()
            v_preds = (torch.sigmoid(logits) > 0.5).float()
            v_correct += (v_preds == labels).sum().item()
            v_tot += labels.size(0)

    va_loss = v_loss/len(val_loader)
    va_acc = v_correct/v_tot

    scheduler.step(va_loss)

    train_acc.append(tr_acc); val_acc.append(va_acc)
    train_loss.append(tr_loss); val_loss.append(va_loss)
    print(f"Epoch {epoch+1:02d} | train acc {tr_acc:.3f} loss {tr_loss:.3f} | val acc {va_acc:.3f} loss {va_loss:.3f}")

    # simple early stop
    if va_loss < best_val - 1e-4:
        best_val = va_loss
        bad = 0
        best_state = {k: v.cpu() for k, v in model.state_dict().items()}
    else:
        bad += 1
        if bad >= patience:
            print("Early stopping.")
            break

model.load_state_dict({k: v.to(device) for k, v in best_state.items()})


Epoch 01 | train acc 0.517 loss 0.690 | val acc 0.545 loss 0.689
