Image classification Using VGG-16
Dataset - CIFAR10

In [42]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import confusion_matrix
import itertools
import numpy as np
import matplotlib.pyplot as plt

2. load and tranfrom dataset

In [43]:
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [44]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)

In [45]:
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False)

3. define the vgg16 model

In [46]:
vgg16 = torchvision.models.vgg16(pretrained=False)

print(vgg16)



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [47]:
#modify the classifier
num_features = vgg16.classifier[6].in_features
print(num_features)

4096


In [48]:
features = list(vgg16.classifier.children())[:-1]
print(features)

[Linear(in_features=25088, out_features=4096, bias=True), ReLU(inplace=True), Dropout(p=0.5, inplace=False), Linear(in_features=4096, out_features=4096, bias=True), ReLU(inplace=True), Dropout(p=0.5, inplace=False)]


In [49]:
features.extend([nn.Linear(num_features, 10)])
print(features)

[Linear(in_features=25088, out_features=4096, bias=True), ReLU(inplace=True), Dropout(p=0.5, inplace=False), Linear(in_features=4096, out_features=4096, bias=True), ReLU(inplace=True), Dropout(p=0.5, inplace=False), Linear(in_features=4096, out_features=10, bias=True)]


In [50]:
vgg16.classifier = nn.Sequential(*features)
print(vgg16)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [51]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [52]:
vgg16 = vgg16.to(device)

4. define loss function and optimizer

In [53]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)

5. train the model

In [None]:
num_epochs = 10
# Initialize lists to store loss and accuracy of each epoch
losses = []
accuracies = []

for epoch in range(num_epochs):
    running_loss = 0.0
    correct = 0  # to track the number of correct predictions
    total = 0  # to track the total number of predictions

    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = vgg16(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(trainloader)
    epoch_accuracy = 100 * correct / total

    losses.append(epoch_loss)  # Append the average loss for this epoch to the list
    accuracies.append(epoch_accuracy)  # Append the accuracy for this epoch to the list

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss}, Accuracy: {epoch_accuracy}%")

Epoch 1/10, Loss: 1.7962326902803212, Accuracy: 34.084%
Epoch 2/10, Loss: 1.3775401706887755, Accuracy: 49.98%
Epoch 3/10, Loss: 1.1460358583995798, Accuracy: 58.778%
Epoch 4/10, Loss: 0.9565046871997421, Accuracy: 66.158%


6. save the model

In [None]:
torch.save(vgg16.state_dict(), 'vgg16_cifar10_without_transfer_learning.pth')

7. plot training loss and accuracy

In [None]:
import matplotlib.pyplot as plt

# Assuming `losses` and `accuracies` are lists containing loss and accuracy values for each epoch

# Set up a figure with two subplots
plt.figure(figsize=(15, 5))

# Plot for training loss
plt.subplot(1, 2, 1)  # 1 row, 2 columns, 1st subplot
plt.plot(losses, label='Training Loss')
plt.title("Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

# Plot for training accuracy
plt.subplot(1, 2, 2)  # 1 row, 2 columns, 2nd subplot
plt.plot(accuracies, label='Training Accuracy')
plt.title("Training Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.legend()

# Display the plots
plt.show()

8. confusion matrix

In [None]:
# Get predictions and true labels
all_preds = []
all_labels = []
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = vgg16(images)
        _, predicted = torch.max(outputs.data, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Compute confusion matrix
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(10,10))
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(trainset.classes))
plt.xticks(tick_marks, trainset.classes, rotation=45)
plt.yticks(tick_marks, trainset.classes)

# Loop over data to plot text
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, cm[i, j],
             horizontalalignment="center",
             color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

9. load the model

In [None]:
# model = models.vgg16()  # Initialize the model
# model.load_state_dict(torch.load('./vgg16_cifar10_without_transferlearning.pth'))
# model = model.to(device)


from torchvision import models

model = models.vgg16(pretrained=False)  # Initialize VGG16 without pretrained weights

# Modify the classifier to match the saved model
num_features = model.classifier[6].in_features
features = list(model.classifier.children())[:-1]  # Remove last layer
features.extend([nn.Linear(num_features, 10)])  # Adapt to your dataset, CIFAR10 has 10 classes
model.classifier = nn.Sequential(*features)

# Load the state dict (model weights)
model.load_state_dict(torch.load('./vgg16_cifar10_without_transferlearning.pth'))

# Move the model to the appropriate device (GPU or CPU)
model = model.to(device)

10. test the model

In [None]:
from PIL import Image
import torchvision.transforms as transforms

# 1. Load the image
image_path = 'test2.jpg'  # Change this to the path of your image
image = Image.open(image_path)

# 2. Preprocess the image
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to 224x224 pixels
    transforms.ToTensor(),  # Convert the image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize it
])
input_image = transform(image).unsqueeze(0)  # Add a batch dimension

# 3. Put the model in evaluation mode
model.eval()

# 4. Make a prediction
with torch.no_grad():
    # Move the input and model to GPU for speed if available
    if torch.cuda.is_available():
        input_image = input_image.to('cuda')
        model.to('cuda')

    output = model(input_image)

# 5. Interpret the output
_, predicted = torch.max(output, 1)  # Get the index of the highest log-probability

# Get the class label (modify this part according to your dataset classes)
class_labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
  # Replace with your own class labels
predicted_class = class_labels[predicted.item()]

print("Predicted Class: ", predicted_class)

In [None]:
from PIL import Image
import torchvision.transforms as transforms

# 1. Load the image
image_path = '/content/bird.jpeg'  # Change this to the path of your image
image = Image.open(image_path)

# 2. Preprocess the image
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to 224x224 pixels
    transforms.ToTensor(),  # Convert the image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize it
])
input_image = transform(image).unsqueeze(0)  # Add a batch dimension

# 3. Put the model in evaluation mode
model.eval()

# 4. Make a prediction
with torch.no_grad():
    # Move the input and model to GPU for speed if available
    if torch.cuda.is_available():
        input_image = input_image.to('cuda')
        model.to('cuda')

    output = model(input_image)

# 5. Interpret the output
_, predicted = torch.max(output, 1)  # Get the index of the highest log-probability

# Get the class label (modify this part according to your dataset classes)
class_labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
  # Replace with your own class labels
predicted_class = class_labels[predicted.item()]

print("Predicted Class: ", predicted_class)