In [2]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import time
from tqdm import tqdm_notebook as tqdm

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
# Prepare dataset
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean=0.1307,std=0.3081)])

train_set = torchvision.datasets.MNIST(root = "./dataset",train=True,download=False,transform=transform)  #Make download == True to download dataset
test_set = torchvision.datasets.MNIST(root ="./dataset",train=False,download=False,transform=transform)

train_loader = torch.utils.data.DataLoader(train_set,batch_size=32,shuffle=True,num_workers=4)
test_loader = torch.utils.data.DataLoader(test_set,batch_size=32,shuffle=True,num_workers=4)

In [5]:
# Building neural net
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.fc1 = nn.Linear(28*28*1,500)
        self.fc2 = nn.Linear(500,10)
    
    def forward(self,x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

In [11]:
# Training nn

model = Net()
model.to(device) # transfer to GPU if available
optimizer = torch.optim.Adam(model.parameters(),lr=3e-4)
calculate_loss = nn.CrossEntropyLoss()
total_time = 0

num_epochs = 10

since = time.time()
for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)
    #scheduler.step()
    model.train()
    running_loss = 0.0
    tk0 = tqdm(train_loader, total=int(len(train_loader)))
    counter = 0

    for batch_idx, data_set in enumerate(tk0):
        
        # Training Code start

        optimizer.zero_grad()  # Clear the old gradients from last step  
        data , labels = data_set
        data = data.to(device)
        labels = labels.to(device)
        data = data.view(-1,28*28)
        output = model(data)   #forward
        output.to(device)
        loss = calculate_loss(output,labels) #calculate loss
        loss.backward()
        optimizer.step()

        # Training code ends

        running_loss += loss.item() * data.size(0)
        counter += 1
        tk0.set_postfix(loss=(running_loss / (counter * train_loader.batch_size)))
    
    # Print weights and bias for each layer
    for name, param in model.named_parameters():
        if param.requires_grad:
            print (name, param.data)

        
    #epoch_loss = running_loss / len(train_loader)
    #print('Training Loss: {:.4f}'.format(epoch_loss))

time_elapsed = time.time() - since
print('\nTraining completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
        

#print('Total time to train model :{} s'.format(total_time))

Epoch 0/9
----------


HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-0.0091, -0.0151,  0.0198,  ...,  0.0185,  0.0010, -0.0019],
        [-0.0038, -0.0332, -0.0108,  ...,  0.0184, -0.0325,  0.0219],
        [ 0.0262, -0.0309, -0.0258,  ..., -0.0260, -0.0314, -0.0144],
        ...,
        [-0.0149, -0.0233, -0.0111,  ..., -0.0156, -0.0171, -0.0247],
        [ 0.0189, -0.0248,  0.0365,  ..., -0.0146,  0.0037, -0.0126],
        [ 0.0043,  0.0113,  0.0083,  ...,  0.0349, -0.0243,  0.0106]],
       device='cuda:0')
fc1.bias tensor([ 1.6260e-02,  5.5940e-03,  7.2839e-03,  1.6641e-02,  6.6383e-03,
         2.8665e-02,  2.8198e-02, -1.2833e-02, -8.6389e-03,  1.3364e-02,
         4.0898e-02,  2.7123e-02,  6.5848e-04, -1.1744e-02, -2.0342e-02,
        -3.1098e-02, -5.8353e-03, -2.6237e-02,  3.3176e-02,  1.5601e-02,
        -1.1692e-02,  2.5335e-02,  2.9386e-02,  2.2417e-02,  4.9747e-05,
        -6.2757e-03,  1.1632e-03, -2.5560e-03, -2.6187e-02,  3.3834e-02,
         3.3315e-03,  3.3649e-02,  3.2316e-02,  6.3106e-03, -6.1509e-03,
        -3

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-0.0099, -0.0159,  0.0190,  ...,  0.0177,  0.0002, -0.0027],
        [-0.0025, -0.0320, -0.0096,  ...,  0.0196, -0.0313,  0.0232],
        [ 0.0252, -0.0319, -0.0268,  ..., -0.0270, -0.0324, -0.0153],
        ...,
        [-0.0126, -0.0209, -0.0087,  ..., -0.0132, -0.0147, -0.0224],
        [ 0.0127, -0.0310,  0.0302,  ..., -0.0209, -0.0026, -0.0189],
        [ 0.0032,  0.0102,  0.0071,  ...,  0.0337, -0.0255,  0.0095]],
       device='cuda:0')
fc1.bias tensor([ 1.7068e-02,  4.3604e-03,  8.2566e-03,  1.5083e-02,  8.9387e-03,
         3.0168e-02,  2.8881e-02, -1.6197e-02, -9.4059e-03,  1.5569e-02,
         3.8433e-02,  2.5443e-02, -1.0968e-03, -1.4424e-02, -2.6378e-02,
        -3.1930e-02, -6.3003e-03, -2.6485e-02,  3.3547e-02,  1.3374e-02,
        -1.6128e-02,  2.0096e-02,  2.7798e-02,  2.5301e-02, -3.8851e-03,
        -7.2678e-03, -4.0013e-03, -2.9917e-03, -2.7521e-02,  2.7835e-02,
         4.8419e-03,  2.8979e-02,  2.6300e-02,  3.0160e-03, -6.4832e-03,
        -3

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-0.0080, -0.0140,  0.0209,  ...,  0.0196,  0.0022, -0.0008],
        [-0.0051, -0.0345, -0.0122,  ...,  0.0171, -0.0339,  0.0206],
        [ 0.0280, -0.0291, -0.0240,  ..., -0.0242, -0.0296, -0.0125],
        ...,
        [-0.0098, -0.0181, -0.0059,  ..., -0.0104, -0.0119, -0.0196],
        [ 0.0158, -0.0280,  0.0333,  ..., -0.0178,  0.0005, -0.0158],
        [ 0.0026,  0.0096,  0.0065,  ...,  0.0332, -0.0261,  0.0089]],
       device='cuda:0')
fc1.bias tensor([ 1.5140e-02,  6.9334e-03,  5.4631e-03,  1.2056e-02,  1.9829e-03,
         3.1881e-02,  2.4238e-02, -2.0301e-02, -1.0464e-02,  1.6248e-02,
         3.9616e-02,  2.0426e-02, -4.1414e-04, -1.3861e-02, -2.8456e-02,
        -3.1876e-02, -8.3932e-03, -2.8451e-02,  3.1900e-02,  1.1063e-02,
        -1.8563e-02,  2.7362e-02,  3.0088e-02,  2.0184e-02, -3.8725e-03,
        -8.4462e-03, -4.4835e-03, -2.3105e-04, -2.6718e-02,  3.0923e-02,
         4.8837e-03,  2.5210e-02,  2.8415e-02, -2.5133e-03, -4.8922e-03,
        -3

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-0.0077, -0.0137,  0.0212,  ...,  0.0199,  0.0024, -0.0005],
        [-0.0044, -0.0338, -0.0114,  ...,  0.0178, -0.0331,  0.0213],
        [ 0.0297, -0.0274, -0.0223,  ..., -0.0225, -0.0279, -0.0108],
        ...,
        [-0.0055, -0.0139, -0.0016,  ..., -0.0062, -0.0076, -0.0153],
        [ 0.0142, -0.0295,  0.0318,  ..., -0.0193, -0.0010, -0.0173],
        [ 0.0029,  0.0099,  0.0069,  ...,  0.0335, -0.0257,  0.0092]],
       device='cuda:0')
fc1.bias tensor([ 1.4858e-02,  6.1911e-03,  3.7774e-03,  1.3557e-02,  6.4345e-03,
         3.3110e-02,  2.6472e-02, -2.2958e-02, -1.0997e-02,  1.5046e-02,
         3.8194e-02,  1.9249e-02, -4.1781e-03, -1.6543e-02, -2.6271e-02,
        -3.2259e-02, -1.3710e-02, -3.0367e-02,  2.9548e-02,  1.1829e-02,
        -1.6689e-02,  2.5741e-02,  2.7081e-02,  2.1533e-02, -4.5354e-03,
        -5.9824e-03, -6.4082e-03, -3.1479e-03, -2.7629e-02,  3.0585e-02,
        -1.7322e-03,  2.3517e-02,  2.6324e-02, -3.3105e-03, -3.3966e-03,
        -3

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-0.0005, -0.0064,  0.0284,  ...,  0.0271,  0.0097,  0.0067],
        [-0.0031, -0.0326, -0.0102,  ...,  0.0190, -0.0319,  0.0226],
        [ 0.0304, -0.0267, -0.0216,  ..., -0.0218, -0.0272, -0.0101],
        ...,
        [-0.0068, -0.0152, -0.0029,  ..., -0.0075, -0.0089, -0.0166],
        [ 0.0170, -0.0267,  0.0346,  ..., -0.0165,  0.0018, -0.0145],
        [ 0.0019,  0.0089,  0.0059,  ...,  0.0325, -0.0267,  0.0082]],
       device='cuda:0')
fc1.bias tensor([ 7.6190e-03,  4.9530e-03,  3.0524e-03,  1.4541e-02, -3.0145e-03,
         3.3945e-02,  2.6480e-02, -2.3781e-02, -1.2935e-02,  1.2592e-02,
         3.7908e-02,  1.6710e-02, -9.2043e-04, -1.5073e-02, -3.4653e-02,
        -3.1344e-02, -7.4681e-03, -2.9103e-02,  3.0892e-02,  8.1699e-03,
        -1.8718e-02,  2.3400e-02,  2.7361e-02,  2.2684e-02, -2.5223e-03,
        -1.1207e-02, -4.1337e-03, -7.4645e-03, -2.8899e-02,  2.9061e-02,
        -2.5310e-03,  2.3688e-02,  2.2917e-02, -1.5014e-03, -3.0207e-03,
        -3

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-0.0010, -0.0070,  0.0279,  ...,  0.0266,  0.0091,  0.0062],
        [-0.0028, -0.0322, -0.0098,  ...,  0.0194, -0.0316,  0.0229],
        [ 0.0280, -0.0290, -0.0240,  ..., -0.0241, -0.0296, -0.0125],
        ...,
        [-0.0033, -0.0117,  0.0005,  ..., -0.0040, -0.0055, -0.0131],
        [ 0.0158, -0.0279,  0.0333,  ..., -0.0178,  0.0005, -0.0158],
        [ 0.0059,  0.0129,  0.0099,  ...,  0.0365, -0.0227,  0.0122]],
       device='cuda:0')
fc1.bias tensor([ 8.1523e-03,  4.6153e-03,  5.4411e-03,  1.0961e-02, -6.7379e-03,
         2.8521e-02,  1.9128e-02, -2.6985e-02, -1.4837e-02,  1.3900e-02,
         3.7218e-02,  1.8139e-02, -3.7891e-05, -1.7607e-02, -3.0519e-02,
        -3.1919e-02, -9.9761e-03, -3.0322e-02,  2.4428e-02,  5.8718e-03,
        -2.5601e-02,  2.4001e-02,  2.4136e-02,  1.9269e-02, -2.4787e-03,
        -6.6059e-03, -7.4664e-03, -1.8942e-03, -2.8608e-02,  2.6161e-02,
        -1.5276e-03,  1.8096e-02,  2.3104e-02, -5.5067e-03, -3.9179e-03,
        -3

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-0.0044, -0.0104,  0.0245,  ...,  0.0232,  0.0057,  0.0028],
        [-0.0042, -0.0336, -0.0112,  ...,  0.0180, -0.0329,  0.0216],
        [ 0.0275, -0.0295, -0.0244,  ..., -0.0246, -0.0300, -0.0130],
        ...,
        [-0.0015, -0.0098,  0.0024,  ..., -0.0022, -0.0036, -0.0113],
        [ 0.0151, -0.0286,  0.0326,  ..., -0.0185, -0.0002, -0.0165],
        [ 0.0054,  0.0124,  0.0093,  ...,  0.0359, -0.0233,  0.0117]],
       device='cuda:0')
fc1.bias tensor([ 1.1551e-02,  5.9715e-03,  5.8922e-03,  9.2880e-03, -2.7313e-03,
         3.3258e-02,  2.1192e-02, -2.7008e-02, -1.1848e-02,  1.0630e-02,
         3.6840e-02,  1.9237e-02,  7.2489e-04, -1.6778e-02, -3.3899e-02,
        -2.9827e-02, -7.9792e-03, -2.6273e-02,  2.6830e-02,  1.0701e-02,
        -2.4450e-02,  2.2187e-02,  2.4121e-02,  2.4370e-02, -2.5158e-03,
        -8.9128e-03, -8.5593e-03, -7.2613e-03, -2.6566e-02,  2.5761e-02,
        -6.6036e-03,  2.5413e-02,  2.4686e-02, -1.1054e-03,  2.1095e-03,
        -3

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-0.0009, -0.0068,  0.0281,  ...,  0.0268,  0.0093,  0.0063],
        [-0.0020, -0.0314, -0.0090,  ...,  0.0202, -0.0307,  0.0237],
        [ 0.0318, -0.0253, -0.0202,  ..., -0.0204, -0.0258, -0.0087],
        ...,
        [-0.0043, -0.0126, -0.0004,  ..., -0.0049, -0.0064, -0.0141],
        [ 0.0115, -0.0322,  0.0291,  ..., -0.0220, -0.0037, -0.0200],
        [ 0.0057,  0.0127,  0.0097,  ...,  0.0363, -0.0230,  0.0120]],
       device='cuda:0')
fc1.bias tensor([ 7.9922e-03,  3.8010e-03,  1.6530e-03,  1.9137e-02, -3.0897e-03,
         3.4693e-02,  1.8491e-02, -3.1466e-02, -1.6942e-02,  1.4663e-02,
         3.9303e-02,  1.9721e-02, -2.3515e-03, -2.1190e-02, -3.0524e-02,
        -3.1871e-02, -8.0182e-03, -2.9512e-02,  2.9918e-02,  7.7367e-03,
        -2.2778e-02,  1.9485e-02,  2.5752e-02,  1.8227e-02, -4.0400e-03,
        -8.0565e-03, -9.6231e-03, -5.7952e-03, -2.7286e-02,  2.7622e-02,
        -5.3907e-03,  1.5152e-02,  2.9370e-02, -7.8791e-03, -1.6341e-03,
        -3

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[-3.7134e-03, -9.6657e-03,  2.5203e-02,  ...,  2.3907e-02,
          6.4576e-03,  3.4797e-03],
        [-6.0456e-03, -3.5478e-02, -1.3088e-02,  ...,  1.6135e-02,
         -3.4797e-02,  1.9675e-02],
        [ 3.0613e-02, -2.6434e-02, -2.1341e-02,  ..., -2.1535e-02,
         -2.6981e-02, -9.8985e-03],
        ...,
        [ 2.4764e-03, -5.8726e-03,  6.3560e-03,  ...,  1.8046e-03,
          3.5082e-04, -7.3297e-03],
        [ 1.5295e-02, -2.8418e-02,  3.2832e-02,  ..., -1.8275e-02,
          3.4734e-05, -1.6282e-02],
        [-2.0888e-04,  6.7912e-03,  3.7782e-03,  ...,  3.0384e-02,
         -2.8844e-02,  6.1045e-03]], device='cuda:0')
fc1.bias tensor([ 1.0842e-02,  7.8618e-03,  2.8278e-03,  8.2408e-03, -2.9578e-03,
         3.4325e-02,  1.7727e-02, -2.6921e-02, -9.0687e-03,  1.2472e-02,
         3.6582e-02,  1.6956e-02,  2.4248e-04, -1.8819e-02, -2.8970e-02,
        -2.9416e-02, -1.1908e-02, -3.2305e-02,  2.5240e-02,  8.0194e-03,
        -2.4887e-02,  2.1450e-02,  2.3

HBox(children=(FloatProgress(value=0.0, max=1875.0), HTML(value='')))


fc1.weight tensor([[ 0.0044, -0.0016,  0.0333,  ...,  0.0320,  0.0146,  0.0116],
        [ 0.0007, -0.0288, -0.0064,  ...,  0.0228, -0.0281,  0.0264],
        [ 0.0352, -0.0219, -0.0168,  ..., -0.0170, -0.0224, -0.0053],
        ...,
        [ 0.0005, -0.0078,  0.0044,  ..., -0.0001, -0.0016, -0.0093],
        [ 0.0174, -0.0263,  0.0349,  ..., -0.0162,  0.0021, -0.0142],
        [ 0.0041,  0.0111,  0.0081,  ...,  0.0347, -0.0245,  0.0104]],
       device='cuda:0')
fc1.bias tensor([ 2.7443e-03,  1.1560e-03, -1.7326e-03,  1.2120e-02, -5.9690e-03,
         3.2270e-02,  1.5736e-02, -3.0099e-02, -1.4837e-02,  1.6108e-02,
         3.3527e-02,  1.4260e-02, -2.4693e-03, -1.8803e-02, -3.3134e-02,
        -3.4206e-02, -9.9235e-03, -3.2191e-02,  3.0348e-02,  1.7142e-03,
        -2.5913e-02,  1.7432e-02,  2.6444e-02,  2.5531e-02, -3.8984e-03,
        -5.9371e-03, -1.0367e-02, -8.3641e-03, -2.5365e-02,  2.4733e-02,
        -8.6343e-03,  1.8855e-02,  2.3998e-02, -6.7321e-03, -2.4873e-03,
        -3

In [6]:
    # Predicting

    correct, total = 0 , 0
    predictions = []
    model.eval()

    for i, data_set in enumerate(test_loader):
        data, labels = data_set
        data, labels = data.to(device), labels.to(device)
        data = data.view(-1,28*28)
        output = model(data)
        _, predicted = torch.max(output.data,1)
        predictions.append(output)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('The testing set accuracy of the network is: %d %%' % (100 * correct / total))


The testing set accuracy of the network is: 79 %


Parameter containing:
tensor([[ 0.0139,  0.0130,  0.0267,  ...,  0.0119, -0.0235,  0.0005],
        [ 0.0185, -0.0026, -0.0171,  ...,  0.0332,  0.0187, -0.0077],
        [ 0.0230,  0.0112, -0.0094,  ...,  0.0278, -0.0123, -0.0041],
        ...,
        [ 0.0246,  0.0044,  0.0022,  ...,  0.0314,  0.0008,  0.0101],
        [-0.0035,  0.0203, -0.0236,  ...,  0.0303, -0.0384, -0.0062],
        [ 0.0084, -0.0024,  0.0443,  ...,  0.0299,  0.0022,  0.0170]],
       device='cuda:0', requires_grad=True)

Parameter containing:
tensor([[-0.0074,  0.0015, -0.0349,  ...,  0.0091, -0.0098, -0.0414],
        [ 0.0562, -0.0926, -0.1495,  ..., -0.1102,  0.0838,  0.0873],
        [ 0.0033,  0.0614,  0.0191,  ...,  0.0430, -0.0617,  0.0922],
        ...,
        [ 0.0571, -0.0796,  0.0178,  ...,  0.0715,  0.1202, -0.2205],
        [ 0.1078,  0.0442, -0.1808,  ..., -0.1113, -0.1940, -0.1944],
        [ 0.0645,  0.0373, -0.0854,  ...,  0.0446, -0.0196, -0.0367]],
       device='cuda:0', requires_grad=True)

[Parameter containing:
 tensor([[ 0.0139,  0.0130,  0.0267,  ...,  0.0119, -0.0235,  0.0005],
         [ 0.0185, -0.0026, -0.0171,  ...,  0.0332,  0.0187, -0.0077],
         [ 0.0230,  0.0112, -0.0094,  ...,  0.0278, -0.0123, -0.0041],
         ...,
         [ 0.0246,  0.0044,  0.0022,  ...,  0.0314,  0.0008,  0.0101],
         [-0.0035,  0.0203, -0.0236,  ...,  0.0303, -0.0384, -0.0062],
         [ 0.0084, -0.0024,  0.0443,  ...,  0.0299,  0.0022,  0.0170]],
        device='cuda:0', requires_grad=True),
 Parameter containing:
 tensor([-0.0347,  0.0123,  0.0032,  0.0238,  0.0130, -0.0082, -0.0160, -0.0066,
          0.0072,  0.0017, -0.0017, -0.0059,  0.0274, -0.0229, -0.0189, -0.0360,
         -0.0423,  0.0253, -0.0012,  0.0126,  0.0067, -0.0012, -0.0524, -0.0026,
         -0.0407, -0.0101, -0.0421,  0.0002,  0.0152, -0.0254, -0.0372,  0.0208,
          0.0177, -0.0162, -0.0018, -0.0044, -0.0418,  0.0076,  0.0153, -0.0488,
          0.0022,  0.0245,  0.0243,  0.0051,  0.0160,  0.0101,

fc1.weight tensor([[-0.0253, -0.0319, -0.0019,  ..., -0.0356, -0.0143,  0.0235],
        [ 0.0323, -0.0257, -0.0005,  ...,  0.0330,  0.0224, -0.0136],
        [-0.0300, -0.0284, -0.0300,  ...,  0.0241,  0.0175,  0.0123],
        ...,
        [ 0.0272, -0.0117,  0.0042,  ...,  0.0397,  0.0152,  0.0197],
        [ 0.0292,  0.0016, -0.0048,  ..., -0.0336, -0.0067,  0.0132],
        [ 0.0096, -0.0193,  0.0057,  ...,  0.0237, -0.0202, -0.0035]],
       device='cuda:0')
fc1.bias tensor([ 4.2228e-03,  1.0382e-02, -9.1322e-04, -2.4456e-02, -3.3704e-02,
        -5.1710e-03, -3.6399e-02, -3.6232e-02,  2.7458e-02,  1.5190e-02,
        -1.8172e-02, -1.3266e-03,  2.3059e-02,  3.8757e-02,  1.2302e-02,
        -7.1338e-03,  1.1063e-02,  2.2396e-03, -2.7657e-02, -3.0924e-02,
        -1.5945e-02, -4.5612e-02,  1.6882e-02, -3.5450e-02,  2.4779e-03,
        -4.8129e-03, -8.8973e-03,  8.5581e-03, -2.6795e-02, -1.6801e-02,
        -2.5413e-02, -1.3415e-02, -2.9976e-02, -2.0458e-02, -2.4543e-02,
        -2.