In [1]:
import torch
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import random
import torch.nn.init as init

random.seed(42)
torch.manual_seed(42)

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])



In [2]:
# Download and load the training and test sets
trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Create DataLoader for batching the data
trainloader = torch.utils.data.DataLoader(trainset, batch_size=200, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=60000, shuffle=False)

In [3]:
len(trainset), len(testset)

(60000, 10000)

In [4]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28 * 28, 256)
        self.fc2 = nn.Tanh()  
        self.fc3 = nn.Linear(256, 10)
        init.xavier_normal_(self.fc1.weight,gain=nn.init.calculate_gain('tanh'))
        init.xavier_normal_(self.fc3.weight)

    def forward(self, x):
        
        
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x


In [5]:
def analyze_layer_statistics(model):
    
    
    stats = {}
    for layer_name, activations in model.activations.items():
        stats[layer_name] = {
            'mean': torch.mean(activations).item(),
            'var': torch.var(activations).item(),
            'min': torch.min(activations).item(),
            'max': torch.max(activations).item()
        }
    
    return stats

In [6]:
model = SimpleNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.01)

In [7]:
28*28*model.fc1.weight.std()**2,10*model.fc1.weight.std()**2

(tensor(4.1684, grad_fn=<MulBackward0>),
 tensor(0.0532, grad_fn=<MulBackward0>))

In [8]:
epochs = 20
losses = []
stats = []
for epoch in range(epochs):
    model.train()
    total = 0
    correct = 0
    running_loss = 0.0
    for images, labels in trainloader:
        optimizer.zero_grad() 
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss
        total += images.shape[0]
        correct += (torch.argmax(output,dim=1)==labels).sum()
    print(f'Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(trainloader):.4f}')
    print(f'Accuracy = {(correct/total)*100}')
    losses.append(running_loss/len(trainloader)) 
    #stats.append(analyze_layer_statistics(model))


Epoch 1/20, Loss: 0.2962
Accuracy = 91.25999450683594
Epoch 2/20, Loss: 0.1883
Accuracy = 94.2449951171875
Epoch 3/20, Loss: 0.1728
Accuracy = 94.6449966430664
Epoch 4/20, Loss: 0.1688
Accuracy = 94.77166748046875
Epoch 5/20, Loss: 0.1630
Accuracy = 94.9800033569336
Epoch 6/20, Loss: 0.1620
Accuracy = 94.9566650390625
Epoch 7/20, Loss: 0.1652
Accuracy = 94.8933334350586
Epoch 8/20, Loss: 0.1621
Accuracy = 94.96499633789062
Epoch 9/20, Loss: 0.1540
Accuracy = 95.25666809082031
Epoch 10/20, Loss: 0.1480
Accuracy = 95.3566665649414
Epoch 11/20, Loss: 0.1438
Accuracy = 95.56999969482422
Epoch 12/20, Loss: 0.1460
Accuracy = 95.52999877929688
Epoch 13/20, Loss: 0.1431
Accuracy = 95.62166595458984
Epoch 14/20, Loss: 0.1463
Accuracy = 95.63333129882812
Epoch 15/20, Loss: 0.1426
Accuracy = 95.7066650390625
Epoch 16/20, Loss: 0.1394
Accuracy = 95.6300048828125
Epoch 17/20, Loss: 0.1337
Accuracy = 95.89666748046875
Epoch 18/20, Loss: 0.1362
Accuracy = 95.86666870117188
Epoch 19/20, Loss: 0.1233
A

In [9]:
stats

[]

In [10]:
correct = 0
total = 0
for images,labels in trainloader:
    model.eval()
    y_pred = model(images)
    loss = criterion(y_pred, labels)
    predicted = torch.argmax(y_pred,dim=1)
    total += labels.size(0)
    correct += (predicted == labels).sum()


# Calculate accuracy
accuracy = 100 * correct / total
print(f'Accuracy: {accuracy}%')
   

Accuracy: 95.40166473388672%


In [11]:
correct = 0
total = 0
for images,labels in testloader:
    model.eval()
    y_pred = model(images)
    loss = criterion(y_pred, labels)
    predicted = torch.argmax(y_pred,dim=1)
    total += labels.size(0)
    correct += (predicted == labels).sum()


# Calculate accuracy
accuracy = 100 * correct / total
print(f'Accuracy: {accuracy}%')
   

Accuracy: 94.80000305175781%


In [12]:
28*28*model.activations['weight'].std()**2

AttributeError: 'SimpleNN' object has no attribute 'activations'

In [None]:
2/(28*28 + 256)

In [None]:
-torch.tensor(1/10).log()