In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset

# Define Residual Block with Skip Connections
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += identity  # Skip connection
        out = self.relu(out)
        return out

# Define ResNet Mini Model
class ResNetMini(nn.Module):
    def __init__(self, num_classes=2):
        super(ResNetMini, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True)
        )
        self.res_block = ResidualBlock(16, 16)
        self.fc = nn.Linear(16 * 32 * 32, num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = self.res_block(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Dataset Path and Transform
dataset_path = "D:\\Programs\\Jupyter\\imagenette2"  # Update path as needed
transform_train = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.RandomHorizontalFlip(),  # Augmentation
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),  # Introduce noise
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
])

# Load Dataset
dataset = datasets.ImageFolder(root=dataset_path, transform=transform_train)
test_dataset = datasets.ImageFolder(root=dataset_path, transform=transform_test)

# Use Fixed 10 Indices for Testing
fixed_indices = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45]  # Replace with desired fixed indices
test_subset = Subset(test_dataset, fixed_indices)
test_loader = DataLoader(test_subset, batch_size=2, shuffle=False)

# Train on a Separate Subset
train_indices = list(set(range(len(dataset))) - set(fixed_indices))[:50]  # Limit training set size
train_subset = Subset(dataset, train_indices)
train_loader = DataLoader(train_subset, batch_size=4, shuffle=True)

# Initialize Model, Loss, and Optimizer
model = ResNetMini(num_classes=len(dataset.classes))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
for epoch in range(10):  # Reduced epochs
    model.train()
    epoch_loss = 0
    for images, labels in train_loader:
        outputs = model(images)
        loss = criterion(outputs, labels)
        epoch_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch [{epoch+1}/3], Loss: {epoch_loss / len(train_loader):.4f}")

# Evaluation
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy on 10 fixed images: {accuracy:.2f}%')

# Visualizing Gradients to Demonstrate Vanishing Gradient Problem
gradients = []
for images, labels in test_loader:
    outputs = model(images)
    loss = criterion(outputs, labels)
    optimizer.zero_grad()
    loss.backward()

    # Capture gradients of the first layer's weights
    gradients.append(model.layer1[0].weight.grad.abs().mean().item())

print("Average Gradient Magnitudes Per Batch:", gradients)

Epoch [1/3], Loss: 0.0401
Epoch [2/3], Loss: 0.0000
Epoch [3/3], Loss: 0.0000
Epoch [4/3], Loss: 0.0000
Epoch [5/3], Loss: 0.0000
Epoch [6/3], Loss: 0.0000
Epoch [7/3], Loss: 0.0000
Epoch [8/3], Loss: 0.0000
Epoch [9/3], Loss: 0.0000
Epoch [10/3], Loss: 0.0000
Accuracy on 10 fixed images: 100.00%
Average Gradient Magnitudes Per Batch: [0.0, 0.0, 0.0, 0.0, 0.0]


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import numpy as np

# Define Residual Block with Vanishing Gradient Problem
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.sigmoid = nn.Sigmoid()  # Introduces vanishing gradient issue
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)
        
        out = self.conv1(x)
        out = self.sigmoid(out)  # Problematic activation
        out = self.conv2(out)
        out += identity  # Skip connection
        out = self.sigmoid(out)  # Problematic activation
        return out

# Define ResNet Model
class ResNetMini(nn.Module):
    def __init__(self, num_classes=2):
        super(ResNetMini, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True)  # Initial activation remains ReLU
        )
        self.res_block = ResidualBlock(32, 32)  # Introduces vanishing gradient
        self.fc = nn.Linear(32 * 32 * 32, num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = self.res_block(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Set seed for reproducibility
random_seed = 42
np.random.seed(random_seed)
torch.manual_seed(random_seed)

# Load and Preprocess Dataset from a specified path
dataset_path = "D:\\Programs\\Jupyter\\imagenette2"  # Update as needed
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
])

# Load dataset using ImageFolder
dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

# Select a subset of 10 images for testing with fixed random seed
subset_size = 10
subset_indices = np.random.choice(len(dataset), subset_size, replace=False)
subset = Subset(dataset, subset_indices)
loader = DataLoader(subset, batch_size=2, shuffle=True)

# Initialize Model, Loss, and Optimizer
model = ResNetMini(num_classes=len(dataset.classes))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
for epoch in range(15):  # Reduced epochs to focus on issue
    model.train()
    epoch_loss = 0
    for images, labels in loader:
        outputs = model(images)
        loss = criterion(outputs, labels)
        epoch_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch [{epoch+1}/5], Loss: {epoch_loss / len(loader):.4f}")

# Evaluation
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for images, labels in loader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy on 10 selected images: {accuracy:.2f}%')

# Visualizing Gradients to Demonstrate Vanishing
gradients = []
for images, labels in loader:
    outputs = model(images)
    loss = criterion(outputs, labels)
    optimizer.zero_grad()
    loss.backward()

    # Capture gradients of the first convolutional layer
    gradients.append(model.layer1[0].weight.grad.abs().mean().item())

print("Average Gradient Magnitudes Per Batch:", gradients)

Epoch [1/5], Loss: 5.4183
Epoch [2/5], Loss: 13.8675
Epoch [3/5], Loss: 12.1574
Epoch [4/5], Loss: 3.6827
Epoch [5/5], Loss: 1.3058
Epoch [6/5], Loss: 1.1892
Epoch [7/5], Loss: 1.1995
Epoch [8/5], Loss: 0.5159
Epoch [9/5], Loss: 0.8020
Epoch [10/5], Loss: 0.7299
Accuracy on 10 selected images: 80.00%
Average Gradient Magnitudes Per Batch: [0.014891930855810642, 0.0043385145254433155, 0.004565069451928139, 0.017519501969218254, 0.0036052113864570856]
