In [2]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import time
from tqdm import tqdm_notebook as tqdm

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
# Prepare dataset
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=0.1307,std=0.3081)])

train_set = torchvision.datasets.MNIST(root = "./dataset",train=True,download=False,transform=transform)  #Make download == True to download dataset
test_set = torchvision.datasets.MNIST(root ="./dataset",train=False,download=False,transform=transform)

train_loader = torch.utils.data.DataLoader(train_set,batch_size=32,shuffle=True,num_workers=4)
test_loader = torch.utils.data.DataLoader(test_set,batch_size=32,shuffle=True,num_workers=4)

In [5]:
# Building neural net
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.fc1 = nn.Linear(28*28*1,500)
        self.fc2 = nn.Linear(500,10)
    
    def forward(self,x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

In [23]:
# Training nn

model = Net()
model.to(device) # transfer to GPU if available
optimizer = torch.optim.Adam(model.parameters(),lr=3e-4)
calculate_loss = nn.CrossEntropyLoss()

num_epochs = 10

since = time.time()
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    #scheduler.step()
    model.train()
    running_loss = 0.0
    tk0 = tqdm(train_loader, total=int(len(train_loader)))
    counter = 0

    for batch_idx, data_set in enumerate(tk0):
        
        # Training Code start

        optimizer.zero_grad()  # Clear the old gradients from last step  
        data , labels = data_set
        data = data.to(device)
        labels = labels.to(device)
        data = data.view(-1,28*28)
        output = model(data)   #forward
        output.to(device)
        loss = calculate_loss(output,labels) #calculate loss
        loss.backward()
        optimizer.step()

        # Training code ends

        running_loss += loss.item() * data.size(0)
        counter += 1
        tk0.set_postfix(loss=(running_loss / (counter * train_loader.batch_size)))
    
    #Gradients
    for name, param in model.named_parameters():
        if param.requires_grad:
            print (name, param.grad.data)
    
    #weights
    for name, param in model.named_parameters():
        if param.requires_grad:
            print (name, param.data)
    

        
    #epoch_loss = running_loss / len(train_loader)
    #print('Training Loss: {:.4f}'.format(epoch_loss))

time_elapsed = time.time() - since
print('\nTraining completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
        

#print('Total time to train model :{} s'.format(total_time))

Epoch 0/9
----------


HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-0.0007, -0.0007, -0.0007,  ..., -0.0007, -0.0007, -0.0007],
        [-0.0002, -0.0002, -0.0002,  ..., -0.0002, -0.0002, -0.0002],
        [-0.0002, -0.0002, -0.0002,  ..., -0.0002, -0.0002, -0.0002],
        ...,
        [-0.0004, -0.0004, -0.0004,  ..., -0.0004, -0.0004, -0.0004],
        [ 0.0002,  0.0002,  0.0002,  ...,  0.0002,  0.0002,  0.0002],
        [ 0.0002,  0.0002,  0.0002,  ...,  0.0002,  0.0002,  0.0002]],
       device='cuda:0')
fc1.bias tensor([ 1.5481e-03,  4.8335e-04,  4.1783e-04, -1.2997e-03, -7.9630e-04,
        -3.1076e-04, -1.8730e-03, -1.7859e-03,  1.2087e-03,  2.2587e-03,
         4.2272e-04,  1.8994e-03, -2.4326e-03,  9.4911e-04,  8.2938e-05,
         2.1752e-03, -2.0053e-04, -1.6130e-03,  1.3925e-03, -1.6281e-03,
         1.2620e-03,  2.5056e-03,  3.1200e-03,  2.0741e-03,  1.2062e-03,
        -4.4582e-04,  2.6272e-03, -5.0377e-04, -1.2047e-03,  1.0295e-04,
        -1.9587e-03,  1.6435e-04,  3.2264e-03, -3.5553e-04, -4.6027e-04,
         6

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-7.7670e-04, -7.7670e-04, -7.7670e-04,  ..., -7.7670e-04,
         -7.7670e-04, -7.7670e-04],
        [ 3.2241e-05,  3.2241e-05,  3.2241e-05,  ...,  3.2241e-05,
          3.2241e-05,  3.2241e-05],
        [-5.7098e-04, -5.7098e-04, -5.7098e-04,  ..., -5.7098e-04,
         -5.7098e-04, -5.7098e-04],
        ...,
        [-1.3656e-04, -1.3656e-04, -1.3656e-04,  ..., -1.3656e-04,
         -1.3656e-04, -1.3656e-04],
        [ 8.3456e-04,  8.3456e-04,  8.3456e-04,  ...,  8.3456e-04,
          8.3456e-04,  8.3456e-04],
        [ 4.3760e-04,  4.3760e-04,  4.3760e-04,  ...,  4.3760e-04,
          4.3760e-04,  4.3760e-04]], device='cuda:0')
fc1.bias tensor([ 1.8309e-03, -7.6003e-05,  1.3460e-03, -1.4613e-03,  1.8205e-04,
         7.1769e-04, -3.8605e-04, -9.5602e-04, -5.9436e-05,  1.0882e-03,
         7.0291e-04,  3.4029e-04, -2.8848e-03, -5.6800e-04,  9.1116e-05,
         2.6917e-03, -1.1974e-04, -1.3809e-04,  1.1313e-03,  2.6048e-04,
         1.2998e-03,  9.9858e-04,  2.8

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-1.5262e-03, -1.5262e-03, -1.5262e-03,  ..., -1.5262e-03,
         -1.5262e-03, -1.5262e-03],
        [ 1.8680e-04,  1.8680e-04,  1.8680e-04,  ...,  1.8680e-04,
          1.8680e-04,  1.8680e-04],
        [-3.3329e-05, -3.3329e-05, -3.3329e-05,  ..., -3.3329e-05,
         -3.3329e-05, -3.3329e-05],
        ...,
        [ 1.8518e-04,  1.8518e-04,  1.8518e-04,  ...,  1.8518e-04,
          1.8518e-04,  1.8518e-04],
        [ 1.2392e-04,  1.2392e-04,  1.2392e-04,  ...,  1.2392e-04,
          1.2392e-04,  1.2392e-04],
        [ 1.9628e-04,  1.9628e-04,  1.9628e-04,  ...,  1.9628e-04,
          1.9628e-04,  1.9628e-04]], device='cuda:0')
fc1.bias tensor([ 3.5978e-03, -4.4034e-04,  7.8567e-05, -1.4916e-04, -2.7900e-04,
        -2.7913e-04, -1.5399e-04,  1.8725e-04,  4.4863e-04,  1.2024e-03,
         4.1613e-04, -1.7991e-04,  1.1313e-03, -1.5760e-04, -1.1375e-03,
         1.7256e-03, -8.5602e-05, -9.8259e-04,  6.5385e-05,  6.5090e-05,
        -3.3836e-04,  2.3763e-03,  3.9

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[ 3.4229e-04,  3.4229e-04,  3.4229e-04,  ...,  3.4229e-04,
          3.4229e-04,  3.4229e-04],
        [ 1.1121e-04,  1.1121e-04,  1.1121e-04,  ...,  1.1121e-04,
          1.1121e-04,  1.1121e-04],
        [ 1.0473e-04,  1.0473e-04,  1.0473e-04,  ...,  1.0473e-04,
          1.0473e-04,  1.0473e-04],
        ...,
        [ 1.9647e-04,  1.9647e-04,  1.9647e-04,  ...,  1.9647e-04,
          1.9647e-04,  1.9647e-04],
        [-2.7592e-04, -2.7592e-04, -2.7592e-04,  ..., -2.7592e-04,
         -2.7592e-04, -2.7592e-04],
        [ 3.0751e-05,  3.0751e-05,  3.0751e-05,  ...,  3.0751e-05,
          3.0751e-05,  3.0751e-05]], device='cuda:0')
fc1.bias tensor([-8.0689e-04, -2.6215e-04, -2.4689e-04, -1.4667e-05, -1.5267e-05,
        -3.8686e-05, -4.6974e-05,  1.0662e-04,  9.7906e-05, -7.8629e-04,
        -1.2364e-05, -4.6223e-04,  7.5103e-04, -4.1102e-04,  5.1436e-04,
         8.4946e-06,  6.9758e-04, -3.0989e-04,  1.5329e-04,  3.9135e-05,
        -6.0649e-04,  1.2796e-04,  3.3

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-9.8531e-06, -9.8531e-06, -9.8531e-06,  ..., -9.8531e-06,
         -9.8531e-06, -9.8531e-06],
        [ 6.2162e-05,  6.2162e-05,  6.2162e-05,  ...,  6.2162e-05,
          6.2162e-05,  6.2162e-05],
        [-2.2839e-05, -2.2839e-05, -2.2839e-05,  ..., -2.2839e-05,
         -2.2839e-05, -2.2839e-05],
        ...,
        [-4.2673e-06, -4.2673e-06, -4.2673e-06,  ..., -4.2673e-06,
         -4.2673e-06, -4.2673e-06],
        [ 7.6655e-05,  7.6655e-05,  7.6655e-05,  ...,  7.6655e-05,
          7.6655e-05,  7.6655e-05],
        [ 7.9520e-05,  7.9520e-05,  7.9520e-05,  ...,  7.9520e-05,
          7.9520e-05,  7.9520e-05]], device='cuda:0')
fc1.bias tensor([ 2.3227e-05, -1.4654e-04,  5.3837e-05, -2.1927e-04,  1.1619e-04,
        -4.0447e-06,  1.2612e-05,  1.0012e-05,  2.1720e-05,  8.8637e-05,
        -3.8345e-05, -2.1165e-05, -4.0682e-04, -3.8439e-05, -4.2025e-05,
        -1.8256e-05,  4.5319e-05, -2.3473e-04, -1.3048e-05, -7.6514e-05,
        -3.3631e-05,  1.1638e-05, -5.0

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-1.4092e-05, -1.4092e-05, -1.4092e-05,  ..., -1.4092e-05,
         -1.4092e-05, -1.4092e-05],
        [-1.2629e-04, -1.2629e-04, -1.2629e-04,  ..., -1.2629e-04,
         -1.2629e-04, -1.2629e-04],
        [-2.6169e-09, -2.6169e-09, -2.6169e-09,  ..., -2.6169e-09,
         -2.6169e-09, -2.6169e-09],
        ...,
        [ 1.7314e-04,  1.7314e-04,  1.7314e-04,  ...,  1.7314e-04,
          1.7314e-04,  1.7314e-04],
        [ 4.2289e-07,  4.2289e-07,  4.2289e-07,  ...,  4.2289e-07,
          4.2289e-07,  4.2289e-07],
        [-1.5151e-06, -1.5151e-06, -1.5151e-06,  ..., -1.5151e-06,
         -1.5151e-06, -1.5151e-06]], device='cuda:0')
fc1.bias tensor([ 3.3218e-05,  2.9770e-04,  6.1689e-09, -8.6326e-06, -8.0986e-06,
         4.1178e-04, -7.1226e-05, -2.2634e-04,  3.5875e-04, -6.4698e-04,
        -3.4450e-06,  2.9049e-04,  1.5928e-04,  3.7259e-04, -2.8288e-05,
        -1.9692e-04, -7.7469e-06, -7.3281e-05, -8.5663e-05,  5.4715e-04,
        -3.6437e-06,  2.9107e-06, -6.3

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[ 6.2024e-05,  6.2024e-05,  6.2024e-05,  ...,  6.2024e-05,
          6.2024e-05,  6.2024e-05],
        [-2.1348e-05, -2.1348e-05, -2.1348e-05,  ..., -2.1348e-05,
         -2.1348e-05, -2.1348e-05],
        [ 1.0062e-05,  1.0062e-05,  1.0062e-05,  ...,  1.0062e-05,
          1.0062e-05,  1.0062e-05],
        ...,
        [ 2.6221e-06,  2.6221e-06,  2.6221e-06,  ...,  2.6221e-06,
          2.6221e-06,  2.6221e-06],
        [-2.2938e-08, -2.2938e-08, -2.2938e-08,  ..., -2.2938e-08,
         -2.2938e-08, -2.2938e-08],
        [-1.1500e-04, -1.1500e-04, -1.1500e-04,  ..., -1.1500e-04,
         -1.1500e-04, -1.1500e-04]], device='cuda:0')
fc1.bias tensor([-1.4621e-04,  5.0324e-05, -2.3719e-05, -3.7985e-05, -4.3733e-05,
        -5.1550e-06, -5.4956e-06,  1.1721e-04, -1.2031e-04, -4.5891e-06,
         1.3236e-05,  6.4039e-06, -9.6613e-07, -8.5811e-06, -3.6533e-06,
        -1.4935e-04,  1.1179e-06, -2.4460e-06,  6.0912e-06,  1.0971e-04,
        -6.2383e-06,  1.6462e-05,  3.5

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-6.3586e-05, -6.3586e-05, -6.3586e-05,  ..., -6.3586e-05,
         -6.3586e-05, -6.3586e-05],
        [ 2.2728e-04,  2.2728e-04,  2.2728e-04,  ...,  2.2728e-04,
          2.2728e-04,  2.2728e-04],
        [ 3.7092e-05,  3.7092e-05,  3.7092e-05,  ...,  3.7092e-05,
          3.7092e-05,  3.7092e-05],
        ...,
        [ 5.4142e-04,  5.4142e-04,  5.4142e-04,  ...,  5.4142e-04,
          5.4142e-04,  5.4142e-04],
        [-2.7083e-07, -2.7083e-07, -2.7083e-07,  ..., -2.7083e-07,
         -2.7083e-07, -2.7083e-07],
        [ 1.1407e-04,  1.1407e-04,  1.1407e-04,  ...,  1.1407e-04,
          1.1407e-04,  1.1407e-04]], device='cuda:0')
fc1.bias tensor([ 1.4989e-04, -5.3576e-04, -8.7437e-05, -1.0317e-05,  4.7432e-07,
        -9.1249e-06, -2.1210e-06, -2.2911e-05, -2.5976e-06,  7.6086e-06,
        -3.0147e-06,  6.7608e-06,  3.1739e-05, -6.0159e-07, -6.2826e-06,
        -5.3647e-06,  2.0072e-05, -4.6527e-06,  4.0471e-05, -3.0043e-04,
         4.7596e-05,  8.3082e-04, -1.2

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[ 2.6500e-04,  2.6500e-04,  2.6500e-04,  ...,  2.6500e-04,
          2.6500e-04,  2.6500e-04],
        [ 8.2661e-04,  8.2661e-04,  8.2661e-04,  ...,  8.2661e-04,
          8.2661e-04,  8.2661e-04],
        [ 2.4090e-04,  2.4090e-04,  2.4090e-04,  ...,  2.4090e-04,
          2.4090e-04,  2.4090e-04],
        ...,
        [-2.9928e-05, -2.9928e-05, -2.9928e-05,  ..., -2.9928e-05,
         -2.9928e-05, -2.9928e-05],
        [-9.5497e-05, -9.5497e-05, -9.5497e-05,  ..., -9.5497e-05,
         -9.5497e-05, -9.5497e-05],
        [ 1.4033e-04,  1.4033e-04,  1.4033e-04,  ...,  1.4033e-04,
          1.4033e-04,  1.4033e-04]], device='cuda:0')
fc1.bias tensor([-6.2469e-04, -1.9486e-03, -5.6789e-04, -2.0348e-03,  2.1195e-03,
        -5.4459e-04,  3.9497e-04,  4.8543e-04, -5.3374e-04,  5.1693e-04,
        -8.8247e-04, -4.2573e-04, -2.1472e-04, -9.3842e-08, -6.5122e-06,
         9.5652e-04, -1.0460e-04, -6.7095e-04, -2.7274e-04, -1.3070e-03,
         1.3317e-03, -9.0594e-05, -5.6

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-4.8161e-05, -4.8161e-05, -4.8161e-05,  ..., -4.8161e-05,
         -4.8161e-05, -4.8161e-05],
        [ 2.6660e-07,  2.6660e-07,  2.6660e-07,  ...,  2.6660e-07,
          2.6660e-07,  2.6660e-07],
        [ 1.6803e-05,  1.6803e-05,  1.6803e-05,  ...,  1.6803e-05,
          1.6803e-05,  1.6803e-05],
        ...,
        [-1.0191e-06, -1.0191e-06, -1.0191e-06,  ..., -1.0191e-06,
         -1.0191e-06, -1.0191e-06],
        [-7.3272e-08, -7.3272e-08, -7.3272e-08,  ..., -7.3272e-08,
         -7.3272e-08, -7.3272e-08],
        [ 3.3874e-05,  3.3874e-05,  3.3874e-05,  ...,  3.3874e-05,
          3.3874e-05,  3.3874e-05]], device='cuda:0')
fc1.bias tensor([ 1.1353e-04, -6.2846e-07, -3.9609e-05,  1.4415e-05,  1.8541e-06,
         1.4720e-05, -3.7321e-08,  1.3545e-04, -1.6656e-06, -3.3498e-06,
         1.1055e-04, -5.5991e-05, -1.8144e-04,  2.3887e-05,  4.7557e-07,
        -6.4291e-05, -3.6736e-07, -7.7173e-07, -3.4475e-05, -1.5697e-04,
         3.5668e-05, -1.8890e-04,  5.2

In [12]:
    # Predicting

    correct, total = 0 , 0
    predictions = []
    model.eval()

    for i, data_set in enumerate(test_loader):
        data, labels = data_set
        data, labels = data.to(device), labels.to(device)
        data = data.view(-1,28*28)
        output = model(data)
        _, predicted = torch.max(output.data,1)
        predictions.append(output)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('The testing set accuracy of the network is: %d %%' % (100 * correct / total))


The testing set accuracy of the network is: 88 %


In [14]:
model.fc1.weight.grad

tensor([[ 2.9565e-05,  2.9565e-05,  2.9565e-05,  ...,  2.9565e-05,
          2.9565e-05,  2.9565e-05],
        [-3.5774e-08, -3.5774e-08, -3.5774e-08,  ..., -3.5774e-08,
         -3.5774e-08, -3.5774e-08],
        [ 3.2547e-04,  3.2547e-04,  3.2547e-04,  ...,  3.2547e-04,
          3.2547e-04,  3.2547e-04],
        ...,
        [ 2.9608e-05,  2.9608e-05,  2.9608e-05,  ...,  2.9608e-05,
          2.9608e-05,  2.9608e-05],
        [ 1.8915e-03,  1.8915e-03,  1.8915e-03,  ...,  1.8915e-03,
          1.8915e-03,  1.8915e-03],
        [ 7.5613e-04,  7.5613e-04,  7.5613e-04,  ...,  7.5613e-04,
          7.5613e-04,  7.5613e-04]], device='cuda:0')

In [19]:
model.fc1.weight

Parameter containing:
tensor([[ 0.0390, -0.0153, -0.0016,  ...,  0.0038,  0.0425, -0.0112],
        [-0.0220, -0.0057,  0.0122,  ..., -0.0360, -0.0294,  0.0252],
        [ 0.0004, -0.0226,  0.0221,  ..., -0.0065,  0.0014,  0.0297],
        ...,
        [-0.0137,  0.0320,  0.0352,  ...,  0.0339,  0.0325, -0.0029],
        [-0.0206,  0.0025,  0.0165,  ...,  0.0115, -0.0252,  0.0230],
        [-0.0248, -0.0038,  0.0438,  ...,  0.0026,  0.0339, -0.0087]],
       device='cuda:0', requires_grad=True)