In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [13]:
# MNIST data loader
transform=transforms.Compose([transforms.Resize((32, 32)), transforms.ToTensor()])

trainset = torchvision.datasets.MNIST(root='./data/mnist', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256, shuffle=True, num_workers=8)

testset = torchvision.datasets.MNIST(root='./data/mnist', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=1024, num_workers=8)
    

In [14]:
class LeNet(nn.Module):

    def __init__(self):
        super(LeNet, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.avg_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.avg_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = LeNet()
net.to(device)

LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [15]:
params = list(net.parameters())
#print(net.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight

15
torch.Size([6, 1, 3, 3])


In [16]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=2e-3)

In [17]:
for epoch in range(16):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        # if i % 100 == 0:
        #     print('Train - Epoch %d, Batch: %d, Loss: %f' % (epoch, i, loss.detach().cpu().item()))

        loss.backward()
        optimizer.step()

print('Finished Training')

Finished Training


In [18]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %f %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 98.850000 %


In [19]:
print(net.modules)

<bound method Module.modules of LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)>


In [20]:
print(torch.max(net.conv1.weight))
print(torch.min(net.conv1.weight))
print(torch.max(net.conv1.bias))
print(torch.min(net.conv1.bias))

print(torch.max(net.conv2.weight))
print(torch.min(net.conv2.weight))
print(torch.max(net.conv2.bias))
print(torch.min(net.conv2.bias))

print(torch.max(net.fc1.weight))
print(torch.min(net.fc1.weight))
print(torch.max(net.fc1.bias))
print(torch.min(net.fc1.bias))

print(torch.max(net.fc2.weight))
print(torch.min(net.fc2.weight))
print(torch.max(net.fc2.bias))
print(torch.min(net.fc2.bias))

print(torch.max(net.fc3.weight))
print(torch.min(net.fc3.weight))
print(torch.max(net.fc3.bias))
print(torch.min(net.fc3.bias))

tensor(2.3670, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.9639, device='cuda:0', grad_fn=<MinBackward1>)
tensor(0.2682, device='cuda:0')
tensor(-0.0083, device='cuda:0')
tensor(0.6806, device='cuda:0')
tensor(-0.7110, device='cuda:0')
tensor(0.2057, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.2743, device='cuda:0', grad_fn=<MinBackward1>)
tensor(0.2780, device='cuda:0')
tensor(-0.2330, device='cuda:0')
tensor(0.2285, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.1879, device='cuda:0', grad_fn=<MinBackward1>)
tensor(0.5486, device='cuda:0')
tensor(-0.4212, device='cuda:0')
tensor(0.2718, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.1544, device='cuda:0', grad_fn=<MinBackward1>)
tensor(0.3643, device='cuda:0')
tensor(-0.2802, device='cuda:0')
tensor(0.0820, device='cuda:0', grad_fn=<MaxBackward1>)
tensor(-0.1826, device='cuda:0', grad_fn=<MinBackward1>)
