# Coding DL set1

Question 1: Image Preprocessing for Inference (PyTorch)
Problem: Write a function to load an image and preprocess it for inference.

In [4]:
from PIL import Image
import torch
from torchvision import transforms

def preprocess_image(image_path):
    """
    Loads an image and preprocesses it for inference using PyTorch.
    Returns a tensor of shape (1, 3, H, W).
    """

    # Define transforms (resize → center crop → convert to tensor → normalize)
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),  # Converts image to [C, H, W] in range [0,1]
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],  # Standard ImageNet means
            std=[0.229, 0.224, 0.225]
        )
    ])

    # Load image
    image = Image.open(image_path).convert("RGB")

    # Apply transforms
    img_tensor = preprocess(image)

    # Add batch dimension → (1, 3, 224, 224)
    img_tensor = img_tensor.unsqueeze(0)

    return img_tensor


# Example usage
tensor = preprocess_image("tiger.jpg")
print("Preprocessed tensor shape:", tensor.shape)


Preprocessed tensor shape: torch.Size([1, 3, 224, 224])


Question 2: Predict on New Image with a Trained Model
Problem: Perform prediction and get the class label.

In [35]:
# Step 1: Import required libraries
from PIL import Image
import torch
from torchvision import transforms
import torch.nn as nn
import os

# Step 2: Preprocess image function
def preprocess_image(image_path):
    """Load and preprocess a single image for inference."""
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),          # Resize image
        transforms.ToTensor(),                  # Convert to tensor
        transforms.Normalize(                   # Normalize tensor
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])

    img = Image.open(image_path).convert("RGB") # Load image
    return preprocess(img).unsqueeze(0)         # Add batch dimension


# Step 3: Define a simple demo model
class SimpleModel(nn.Module):
    """Simple model for demo."""
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(3 * 224 * 224, 2)  # 2 classes

    def forward(self, x):
        x = x.view(x.size(0), -1)              # Flatten
        return self.fc(x)


# Step 4: Prediction function
def predict_image(model, image_tensor, class_names):
    """Predict label from processed image."""
    with torch.no_grad():
        outputs = model(image_tensor)
        _, predicted = torch.max(outputs, 1)
        return class_names[predicted.item()]


# Step 5: Main script
if __name__ == "__main__":
    model = SimpleModel()        # Create model
    model.eval()                 # Set model to eval mode

    class_names = ["Cat", "Dog"] # Class labels
    folder = "sample_image/input_image"   # Input folder path

    print("Running predictions...\n")

    # Step 6: Loop through images and predict
    for f in os.listdir(folder):
        if f.lower().endswith((".jpg", ".jpeg", ".png")):  # Accept image formats
            img_path = os.path.join(folder, f)
            img_tensor = preprocess_image(img_path)         # Preprocess
            label = predict_image(model, img_tensor, class_names)  # Predict
            print(f"{f}  ->  Predicted: {label}")



Running predictions...

cat1.jpg  ->  Predicted: Cat
dog1.jpg  ->  Predicted: Dog
dog4.jpg  ->  Predicted: Dog


Question 3: Build a CNN to classify CIFAR-10 images (>60% accuracy)

This CNN will easily reach 65–70% accuracy.

In [37]:
# Step 1: Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Step 2: Define transforms (normalize CIFAR-10)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        (0.4914, 0.4822, 0.4465),
        (0.2470, 0.2435, 0.2616)
    )
])

# Step 3: Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform
)
test_dataset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform
)

# Step 4: Create dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader  = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

# Step 5: Build CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),  # FIXED
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1), # FIXED
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 8 * 8, 128),
            nn.ReLU(),
            nn.Linear(128, 10)  # 10 classes
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

# Step 6: Initialize model, loss, optimizer
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 7: Training loop
print("\nTraining started...\n")

for epoch in range(5):  # 5 epochs gives 60–70% accuracy
    running_loss = 0.0

    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss:.2f}")

# Step 8: Evaluate accuracy on test set
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"\nTest Accuracy: {accuracy:.2f}%")




Training started...

Epoch 1, Loss: 1010.58
Epoch 2, Loss: 720.50
Epoch 3, Loss: 588.19
Epoch 4, Loss: 490.27
Epoch 5, Loss: 404.26

Test Accuracy: 72.31%


Question 4: Identify Overfitting + Fix it using Dropout & Early Stopping

(using MNIST dataset)

This version is easy, exam-ready, and fully working.

In [38]:
# Step 1: Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Step 2: Load MNIST dataset
transform = transforms.ToTensor()

train_dataset = torchvision.datasets.MNIST(
    root='./mnist', train=True, download=True, transform=transform
)
val_dataset = torchvision.datasets.MNIST(
    root='./mnist', train=False, download=True, transform=transform
)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader   = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False)

# Step 3: Build a model with DROPOUT to reduce overfitting
class MNISTModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()

        self.fc = nn.Sequential(
            nn.Linear(28*28, 256),
            nn.ReLU(),
            nn.Dropout(0.3),         # Dropout added

            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),         # Dropout added

            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        return self.fc(x)

# Step 4: Initialize model, loss, optimizer
model = MNISTModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 5: Early Stopping Setup
best_val_loss = float('inf')
patience = 3          # Stop if no improvement for 3 epochs
patience_counter = 0

# Step 6: Training loop with validation + early stopping
print("\nTraining started...\n")

for epoch in range(20):  # Max 20 epochs
    model.train()
    total_loss = 0

    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Step 7: Compute validation loss
    model.eval()
    val_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_acc = 100 * correct / total

    print(f"Epoch {epoch+1}: Train Loss = {total_loss:.2f},  "
          f"Val Loss = {val_loss:.2f},  Val Acc = {val_acc:.2f}%")

    # Step 8: Early stopping logic
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0   # Reset
    else:
        patience_counter += 1
        print(f" → No improvement. Early stopping in {patience - patience_counter} epochs.")

        if patience_counter >= patience:
            print("\nEarly stopping activated!")
            break

print("\nTraining complete!")


100.0%
100.0%
100.0%
100.0%



Training started...

Epoch 1: Train Loss = 328.85,  Val Loss = 21.42,  Val Acc = 95.64%
Epoch 2: Train Loss = 146.18,  Val Loss = 16.29,  Val Acc = 96.85%
Epoch 3: Train Loss = 110.26,  Val Loss = 13.62,  Val Acc = 97.25%
Epoch 4: Train Loss = 91.06,  Val Loss = 12.70,  Val Acc = 97.51%
Epoch 5: Train Loss = 78.04,  Val Loss = 11.55,  Val Acc = 97.86%
Epoch 6: Train Loss = 69.19,  Val Loss = 11.14,  Val Acc = 97.86%
Epoch 7: Train Loss = 64.39,  Val Loss = 11.06,  Val Acc = 97.95%
Epoch 8: Train Loss = 55.88,  Val Loss = 10.79,  Val Acc = 98.03%
Epoch 9: Train Loss = 54.10,  Val Loss = 10.05,  Val Acc = 98.23%
Epoch 10: Train Loss = 50.07,  Val Loss = 10.41,  Val Acc = 98.23%
 → No improvement. Early stopping in 2 epochs.
Epoch 11: Train Loss = 44.64,  Val Loss = 10.67,  Val Acc = 98.26%
 → No improvement. Early stopping in 1 epochs.
Epoch 12: Train Loss = 42.98,  Val Loss = 9.99,  Val Acc = 98.25%
Epoch 13: Train Loss = 43.77,  Val Loss = 10.53,  Val Acc = 98.14%
 → No improvement. E

Question 5: Transfer Learning with Pretrained VGG16 (Cats vs Dogs)
Problem: Use VGG16 for binary classification with fine-tuning
Collect dataset from the below sites or any other,
Kaggle Datasets : https://www.kaggle.com/datasets
Google Dataset Search : https://datasetsearch.research.google.com
Papers with Code – Datasets :https://paperswithcode.com/datasets
Roboflow Universe : https://universe.roboflow.com
ImageNet :  https://image-net.org/


In [39]:
#Download Link:
# https://www.kaggle.com/datasets/samuelcortinhas/cats-and-dogs-image-classification

# Step 1: Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models

# Step 2: Define image transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],   # VGG16 normalization
        std=[0.229, 0.224, 0.225]
    )
])

# Step 3: Load dataset (Cats vs Dogs)
train_dir = "sample_image/train"   #  path images
val_dir   = "sample_image/test"    #  path images

train_data = datasets.ImageFolder(train_dir, transform=transform)
val_data   = datasets.ImageFolder(val_dir, transform=transform)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)
val_loader   = torch.utils.data.DataLoader(val_data, batch_size=32, shuffle=False)

# Step 4: Load pretrained VGG16
model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)

# Step 5: Freeze all convolutional layers
for param in model.features.parameters():
    param.requires_grad = False

# Step 6: Replace classifier (fine-tuning)
model.classifier[6] = nn.Linear(4096, 2)   # 2 classes: cat, dog

# Step 7: Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.classifier.parameters(), lr=0.0001)

# Step 8: Training loop
print("\nTraining started...\n")

for epoch in range(3):   # 3 epochs is enough for 90%+
    model.train()
    running_loss = 0

    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Train Loss: {running_loss:.2f}")

    # Step 9: Evaluate accuracy on validation set
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    acc = 100 * correct / total
    print(f"Validation Accuracy: {acc:.2f}%\n")

# Step 10: Save the fine-tuned model
torch.save(model.state_dict(), "vgg16_cats_dogs.pth")
print("Model saved successfully!")



Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\15016/.cache\torch\hub\checkpoints\vgg16-397923af.pth


100.0%



Training started...

Epoch 1, Train Loss: 3.24
Validation Accuracy: 95.71%

Epoch 2, Train Loss: 0.15
Validation Accuracy: 98.57%

Epoch 3, Train Loss: 0.01
Validation Accuracy: 97.14%

Model saved successfully!
