In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
import numpy as np

# Set seed for reproducibility
random_seed = 42
np.random.seed(random_seed)
torch.manual_seed(random_seed)

# Define data transformation with stronger augmentations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to 224x224
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),  # Vertical flip for diversity
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.2),
    transforms.RandomRotation(15),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Standard normalization
])

# Load dataset
dataset_path = "D:\\Programs\\Jupyter\\tiny-imagenet-200\\tiny-imagenet-200"
dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

# Increase subset size
subset_size = 10
subset_indices = np.random.choice(len(dataset), subset_size, replace=False)
subset = Subset(dataset, subset_indices)
loader = DataLoader(subset, batch_size=8, shuffle=True)

# Use ResNet50 instead of ResNet18
class FineTunedResNet50(nn.Module):
    def __init__(self, num_classes):
        super(FineTunedResNet50, self).__init__()
        self.base_model = models.resnet50(pretrained=True)
        for param in self.base_model.parameters():
            param.requires_grad = True  # Unfreeze all layers
        self.base_model.fc = nn.Linear(self.base_model.fc.in_features, num_classes)

    def forward(self, x):
        return self.base_model(x)

# Initialize the model
model = FineTunedResNet50(num_classes=len(dataset.classes)).to("cuda" if torch.cuda.is_available() else "cpu")

# Define Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=3e-5, weight_decay=1e-5)  # Lower weight decay
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6)  # Cosine annealing scheduler

# Training Loop
num_epochs = 30
best_accuracy = 0
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for images, labels in loader:
        images, labels = images.cuda(), labels.cuda()
        outputs = model(images)
        loss = criterion(outputs, labels)
        epoch_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    scheduler.step()

    # Evaluate on the training set
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.cuda(), labels.cuda()
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    best_accuracy = max(best_accuracy, accuracy)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss / len(loader):.4f}, Accuracy: {accuracy:.2f}%")

# Print Best Accuracy Achieved
print(f"Best Accuracy on selected images: {best_accuracy:.2f}%")

# Demonstrate Vanishing Gradient Problem
def calculate_gradients(model, loader):
    model.train()
    gradients = []
    for images, _ in loader:
        images = images.cuda()
        images.requires_grad = True
        outputs = model(images)
        loss = outputs.mean()  # Simplified loss for demonstration
        model.zero_grad()
        loss.backward()

        grad = images.grad.abs().mean().item()
        gradients.append(grad)
    return gradients

gradients = calculate_gradients(model, loader)
print(f"Gradients per sample: {gradients}")




Epoch [1/30], Loss: 0.9533, Accuracy: 80.00%
Epoch [2/30], Loss: 0.8420, Accuracy: 90.00%
Epoch [3/30], Loss: 0.7964, Accuracy: 90.00%
Epoch [4/30], Loss: 0.7495, Accuracy: 90.00%
Epoch [5/30], Loss: 0.6629, Accuracy: 100.00%
Epoch [6/30], Loss: 0.6759, Accuracy: 100.00%
Epoch [7/30], Loss: 0.6569, Accuracy: 90.00%
Epoch [8/30], Loss: 0.6054, Accuracy: 100.00%
Epoch [9/30], Loss: 0.5596, Accuracy: 100.00%
Epoch [10/30], Loss: 0.5838, Accuracy: 100.00%
Epoch [11/30], Loss: 0.5503, Accuracy: 100.00%
Epoch [12/30], Loss: 0.6426, Accuracy: 100.00%
Epoch [13/30], Loss: 0.5711, Accuracy: 100.00%
Epoch [14/30], Loss: 0.5567, Accuracy: 100.00%
Epoch [15/30], Loss: 0.5477, Accuracy: 100.00%
Epoch [16/30], Loss: 0.5896, Accuracy: 100.00%
Epoch [17/30], Loss: 0.5181, Accuracy: 100.00%
Epoch [18/30], Loss: 0.4659, Accuracy: 100.00%
Epoch [19/30], Loss: 1.0032, Accuracy: 100.00%
Epoch [20/30], Loss: 0.4149, Accuracy: 100.00%
Epoch [21/30], Loss: 0.3764, Accuracy: 100.00%
Epoch [22/30], Loss: 0.5043