# VGGNet
## Key Elements
#### Objective:
Explore the effect of convolutiona network depth on image recognition accuracy.

#### Innovations:
- Use of small (3x3) filters consistently throughout the network insted of larger ones like 11x11 or 5x5
- Doubling the number of filters after every pooling operation, enabling a deeper network to capture more complex features.
- Simplicity in design; all convolution layers have the same kernal size and stride.

## Architecture
- Input: 224X224 RGB images.
- Layers: A series of 3x3 convolutiona layers, followed by a pooling layer.
- Blocks: Each block contains multiple convolution layers, followed by a pooling layer.
- Fully Connected: Three dense layers at the end with ReLU and Dropout.
- Output: Softmax activation for classification.

I will be implementing VGG-16.

In [None]:
#importing all the necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


In [None]:
class VGG16(nn.Module):

  def __init__(self, num_classes=10):
    super(VGG16, self).__init__()
    self.features = nn.Sequential(
        #Block 1
        nn.Conv2d(3,64, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(64,64, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),
        #Block 2
        nn.Conv2d(64, 128, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(128, 128, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),

        # Block 3
        nn.Conv2d(128, 256, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(256, 256, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(256, 256, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),

        # Block 4
        nn.Conv2d(256, 512, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),

        # Block 5
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(512, 512, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2),
    )

    self.classifier = nn.Sequential(
        nn.Linear(512 * 7 * 7, 4096),
        nn.ReLU(inplace = True),
        nn.Dropout(),
        nn.Linear(4096, 4096),
        nn.ReLU(inplace = True),
        nn.Dropout(),
        nn.Linear(4096, num_classes),
     )

  def forward(self,x):
    x = self.features(x)
    x = x.view(x.size(0), -1)
    x = self.classifier(x)
    return x


In [None]:
# Training function
def train_model(model, dataloader, criterion, optimizer, epochs=3, device="cuda"):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(dataloader):.4f}")


In [None]:
# Data preparation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# CIFAR-10 dataset for simplicity
train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

# Instantiate model, loss function, and optimizer
model = VGG16(num_classes=10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
train_model(model, train_loader, criterion, optimizer, epochs=5)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:03<00:00, 48.5MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Epoch 1/5, Loss: 2.3042
Epoch 2/5, Loss: 2.3028
Epoch 3/5, Loss: 2.3028
Epoch 4/5, Loss: 2.3027
Epoch 5/5, Loss: 2.3027
