In [1]:
import torch
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim
import random

# Set random seeds for reproducibility
random.seed(42)
torch.manual_seed(42)

# Define data transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Download and load the training and test sets
trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Create DataLoader for batching the data
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=60000, shuffle=False)



In [2]:
# Define the neural network with BatchNorm
class SimpleNNWithBatchNorm(nn.Module):
    def __init__(self):
        super(SimpleNNWithBatchNorm, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28 * 28, 256)
        self.bn1 = nn.BatchNorm1d(256)  # Batch normalization layer
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        self.activations = {}  
        self.activations['weight'] = self.fc1.weight
        x = self.flatten(x)
        self.activations['input'] = x
        x = self.fc1(x)
        self.activations['fc1'] = x
        x = self.bn1(x)
        self.activations['bn1'] = x
        x = torch.tanh(x) 
        x = self.fc2(x)
        return x

# Initialize the model, loss function, and optimizer
model = SimpleNNWithBatchNorm()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)



In [3]:
def analyze_layer_statistics(model):
    
    
    stats = {}
    for layer_name, activations in model.activations.items():
        stats[layer_name] = {
            'mean': torch.mean(activations).item(),
            'var': torch.var(activations).item(),
            'min': torch.min(activations).item(),
            'max': torch.max(activations).item()
        }
    
    
    return stats

In [4]:
epochs = 20
losses = []
stats = []
for epoch in range(epochs):
    model.train()
    total = 0
    correct = 0
    running_loss = 0.0

    for images, labels in trainloader:
        optimizer.zero_grad()  
        output = model(images)  
        loss = criterion(output, labels)  
        loss.backward() 
        optimizer.step() 
        running_loss += loss.item()
        total += images.size(0)
        correct += (torch.argmax(output, dim=1) == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Epoch {epoch + 1}/{epochs}, Loss: {running_loss / len(trainloader):.4f}, Accuracy: {accuracy:.2f}%')
    losses.append(running_loss / len(trainloader))
    stats.append(analyze_layer_statistics(model))
    #break



Epoch 1/20, Loss: 0.2649, Accuracy: 92.04%
Epoch 2/20, Loss: 0.1266, Accuracy: 96.19%
Epoch 3/20, Loss: 0.0844, Accuracy: 97.37%
Epoch 4/20, Loss: 0.0555, Accuracy: 98.16%
Epoch 5/20, Loss: 0.0423, Accuracy: 98.63%
Epoch 6/20, Loss: 0.0335, Accuracy: 98.84%
Epoch 7/20, Loss: 0.0276, Accuracy: 99.06%
Epoch 8/20, Loss: 0.0246, Accuracy: 99.11%
Epoch 9/20, Loss: 0.0190, Accuracy: 99.34%
Epoch 10/20, Loss: 0.0143, Accuracy: 99.54%
Epoch 11/20, Loss: 0.0161, Accuracy: 99.45%
Epoch 12/20, Loss: 0.0188, Accuracy: 99.35%
Epoch 13/20, Loss: 0.0206, Accuracy: 99.31%
Epoch 14/20, Loss: 0.0208, Accuracy: 99.31%
Epoch 15/20, Loss: 0.0174, Accuracy: 99.41%
Epoch 16/20, Loss: 0.0141, Accuracy: 99.50%
Epoch 17/20, Loss: 0.0109, Accuracy: 99.63%
Epoch 18/20, Loss: 0.0079, Accuracy: 99.74%
Epoch 19/20, Loss: 0.0089, Accuracy: 99.71%
Epoch 20/20, Loss: 0.0116, Accuracy: 99.63%


In [5]:
stats

[{'weight': {'mean': 0.0014096543891355395,
   'var': 0.015047002583742142,
   'min': -0.6887809634208679,
   'max': 0.7327782511711121},
  'input': {'mean': -0.010494777001440525,
   'var': 0.9739283323287964,
   'min': -0.4242129623889923,
   'max': 2.821486711502075},
  'fc1': {'mean': -0.4114450216293335,
   'var': 88.69884490966797,
   'min': -41.19322204589844,
   'max': 56.5272102355957},
  'bn1': {'mean': -0.00982438400387764,
   'var': 2.121011257171631,
   'min': -8.685445785522461,
   'max': 9.9185791015625}},
 {'weight': {'mean': 0.00175902945920825,
   'var': 0.02747112140059471,
   'min': -1.0407301187515259,
   'max': 0.8757270574569702},
  'input': {'mean': -0.003478014376014471,
   'var': 0.9929953217506409,
   'min': -0.4242129623889923,
   'max': 2.821486711502075},
  'fc1': {'mean': -0.6395968794822693,
   'var': 157.46495056152344,
   'min': -68.30181884765625,
   'max': 63.37541961669922},
  'bn1': {'mean': -0.01903701014816761,
   'var': 3.2467575073242188,
   'm

In [6]:
# Evaluate on training data
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in trainloader:
        output = model(images)
        predicted = torch.argmax(output, dim=1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Training Accuracy: {accuracy:.2f}%')

Training Accuracy: 99.65%


In [7]:
# Evaluate on training data
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in testloader:
        output = model(images)
        predicted = torch.argmax(output, dim=1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Training Accuracy: {accuracy:.2f}%')

Training Accuracy: 97.34%
