In [17]:
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models
import torch
import numpy as np
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from torch.utils.tensorboard import SummaryWriter
from sklearn.manifold import TSNE
from torch.optim.lr_scheduler import StepLR
import time

In [18]:
# Transformations
# We're transforming the images to 224x224, as that's the input size for GoogleNet. We're also normalizing the images.

# Data Augmentation
# We're using data augmentation to increase the size of the dataset. We're using the following transformations:
# RandomHorizontalFlip
# RandomRotation
# RandomResizedCrop
# ColorJitter
# RandomAffine

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.ColorJitter()
])

full_dataset = datasets.ImageFolder('/kaggle/input/dataset-30/mammals', transform=transform)
class_counts = {class_name: 0 for class_name in full_dataset.classes}
for _, index in full_dataset.samples:
    class_name = full_dataset.classes[index]
    class_counts[class_name] += 1
print("Total number of classes:", len(full_dataset.classes))
print("Class names:", full_dataset.classes)
print("Number of images per class:")
for class_name, count in class_counts.items():
    print(f" - {class_name}: {count}")

Total number of classes: 30
Class names: ['african_elephant', 'alpaca', 'american_bison', 'anteater', 'arctic_fox', 'armadillo', 'baboon', 'badger', 'brown_bear', 'camel', 'giraffe', 'groundhog', 'highland_cattle', 'horse', 'jackal', 'kangaroo', 'koala', 'mongoose', 'mountain_goat', 'opossum', 'orangutan', 'polar_bear', 'porcupine', 'red_panda', 'rhinoceros', 'weasel', 'wildebeest', 'wombat', 'yak', 'zebra']
Number of images per class:
 - african_elephant: 347
 - alpaca: 333
 - american_bison: 343
 - anteater: 299
 - arctic_fox: 315
 - armadillo: 331
 - baboon: 330
 - badger: 310
 - brown_bear: 300
 - camel: 254
 - giraffe: 305
 - groundhog: 309
 - highland_cattle: 311
 - horse: 303
 - jackal: 278
 - kangaroo: 317
 - koala: 319
 - mongoose: 287
 - mountain_goat: 328
 - opossum: 330
 - orangutan: 340
 - polar_bear: 356
 - porcupine: 321
 - red_panda: 329
 - rhinoceros: 274
 - weasel: 282
 - wildebeest: 307
 - wombat: 315
 - yak: 254
 - zebra: 272


In [32]:
train_size = int(0.7 * len(full_dataset))
test_validation_size = len(full_dataset) - train_size
validation_size = test_validation_size // 2
test_size = test_validation_size - validation_size

train_dataset, test_validation_dataset = random_split(full_dataset, [train_size, test_validation_size])
validation_dataset, test_dataset = random_split(test_validation_dataset, [validation_size, test_size])

print("Size of the entire Dataset: ", len(full_dataset))
print("Size of the training Dataset: ", len(train_dataset))
print("Size of the validation Dataset: ", len(validation_dataset))
print("Size of the test Dataset: ", len(test_dataset))

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

Size of the entire Dataset:  9299
Size of the training Dataset:  6509
Size of the validation Dataset:  1395
Size of the test Dataset:  1395


In [33]:
googlenet = models.googlenet(pretrained=False, aux_logits=False)
googlenet.fc = torch.nn.Linear(googlenet.fc.in_features, 30)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
googlenet.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(googlenet.parameters(), lr=0.001, momentum=0.9)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
writer = SummaryWriter()

Here, the training section begins. Let me first define the first 4 variables:

* Patience: The number of epochs to wait before stopping the training if the validation loss doesn't decrease.
* Best Validation Loss: The best validation loss we've seen so far.
* Patience Counter: The number of epochs we've waited so far.
* Epochs: The number of epochs we're training for. After this the training loop begins, in which the model is training. We're printing ALL the metrics (Training Loss, Validation Loss, Training Accuracy, Validation Accuracy) after every epoch, so that we know where we're heading with each epoch.

In [34]:
patience = 5
best_val_loss = np.inf
patience_counter = 0
epochs = 25
for epoch in range(epochs):
    epoch_start_time = time.time()
    train_loss = 0.0
    train_correct = 0
    total_train = 0
    googlenet.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = googlenet(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        train_correct += (predicted == labels).sum().item()

    train_accuracy = 100 * train_correct / total_train
    writer.add_scalar('Loss/Train', train_loss / len(train_loader), epoch)
    writer.add_scalar('Accuracy/Train', train_accuracy, epoch)

    googlenet.eval()
    val_loss = 0.0
    val_correct = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in validation_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = googlenet(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_accuracy = 100 * val_correct / total_val
    writer.add_scalar('Loss/Validation', val_loss / len(validation_loader), epoch)
    writer.add_scalar('Accuracy/Validation', val_accuracy, epoch)

    if val_loss / len(validation_loader) < best_val_loss:
        best_val_loss = val_loss / len(validation_loader)
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping triggered at epoch {epoch+1}")
            break

    scheduler.step()
    print(f"Epoch {epoch+1}: Train Loss: {train_loss / len(train_loader):.4f}, "
      f"Train Accuracy: {train_accuracy:.2f}%, "
      f"Val Loss: {val_loss / len(validation_loader):.4f}, "
      f"Val Accuracy: {val_accuracy:.2f}%")
writer.close()

Epoch 1: Train Loss: 3.0493, Train Accuracy: 14.58%, Val Loss: 2.8152, Val Accuracy: 21.58%
Epoch 2: Train Loss: 2.5208, Train Accuracy: 28.74%, Val Loss: 2.9388, Val Accuracy: 23.73%
Epoch 3: Train Loss: 2.2600, Train Accuracy: 34.48%, Val Loss: 2.4368, Val Accuracy: 32.04%
Epoch 4: Train Loss: 2.0547, Train Accuracy: 40.74%, Val Loss: 2.2539, Val Accuracy: 37.99%
Epoch 5: Train Loss: 1.8810, Train Accuracy: 46.32%, Val Loss: 2.2110, Val Accuracy: 40.14%
Epoch 6: Train Loss: 1.5657, Train Accuracy: 55.17%, Val Loss: 1.6168, Val Accuracy: 53.98%
Epoch 7: Train Loss: 1.4612, Train Accuracy: 58.41%, Val Loss: 1.5954, Val Accuracy: 55.70%
Epoch 8: Train Loss: 1.3937, Train Accuracy: 60.44%, Val Loss: 1.5466, Val Accuracy: 56.13%
Epoch 9: Train Loss: 1.3463, Train Accuracy: 61.75%, Val Loss: 1.5314, Val Accuracy: 56.49%
Epoch 10: Train Loss: 1.3058, Train Accuracy: 62.84%, Val Loss: 1.4862, Val Accuracy: 58.14%
Epoch 11: Train Loss: 1.2431, Train Accuracy: 65.54%, Val Loss: 1.4455, Val Acc