In [1]:
import torch
import torchvision 
import torch.nn as nn
import torchvision.transforms as t

In [2]:
## Defining the model blueprint
net = nn.Sequential(nn.Linear(3*32*32,1000), nn.ReLU(), nn.Linear(1000,10))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr = 0.01, momentum = 0.9)


In [3]:
## Loading the Data
to_tensor = t.ToTensor()
normalize = t.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
flatten = t.Lambda(lambda x:x.view(-1))

## Defining and aligning the transformation

transform_list = t.Compose([to_tensor, normalize, flatten])

## Downloading the data

train_set = torchvision.datasets.CIFAR10(root='.', train=True, transform = transform_list, download = True)
test_set = torchvision.datasets.CIFAR10(root='.', train = False, transform = transform_list, download = True)
# train_dir = ''
# test_dir  = ''


## Creating DataLoader
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 64)
test_loader = torch.utils.data.DataLoader(test_set, batch_size = 64)





Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:04<00:00, 35048141.70it/s]


Extracting ./cifar-10-python.tar.gz to .
Files already downloaded and verified


In [4]:
## Training time
net.train()

## train loop 
for epoch in range(3):
    train_correct , train_loss= 0,0
    print(f'Epoch {epoch}')
    
    ## Loop per epoch
    for i, (batch, targets) in enumerate(train_loader):
        
        output = net(batch)
        loss =criterion(output, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        pred = output.max(1, keepdim=True)[1]
        train_correct += pred.eq(targets.view_as(pred)).sum().item()
        train_loss += loss
        
        
        if i % 100==10:
            print('Train Loss {:.4f}, Train Accuracy {:.2f}%'.format(
            train_loss / (i*64), 100 * train_correct/(i*64)))



print("End of the training..")

## Testing Time
test_correct = 0
net.eval()

## loop over whole test dataset
for i , (batch, targets) in enumerate(test_loader):
    
    output = net(batch)
    pred = output.max(1, keepdim=True)[1]
    test_correct += pred.eq(targets.view_as(pred)).sum().item()
    
print('End of the Testing Test Accuracy: {:.2f}%'.format(
100*test_correct/(len(test_loader) * 64)))
        
        
    

Epoch 0
Train Loss 0.0380, Train Accuracy 18.91%
Train Loss 0.0298, Train Accuracy 32.67%
Train Loss 0.0283, Train Accuracy 36.36%
Train Loss 0.0275, Train Accuracy 38.18%
Train Loss 0.0268, Train Accuracy 39.77%
Train Loss 0.0264, Train Accuracy 40.59%
Train Loss 0.0261, Train Accuracy 41.27%
Train Loss 0.0259, Train Accuracy 41.86%
Epoch 1
Train Loss 0.0252, Train Accuracy 54.22%
Train Loss 0.0231, Train Accuracy 49.96%
Train Loss 0.0230, Train Accuracy 49.52%
Train Loss 0.0229, Train Accuracy 49.66%
Train Loss 0.0226, Train Accuracy 49.91%
Train Loss 0.0225, Train Accuracy 49.95%
Train Loss 0.0225, Train Accuracy 49.96%
Train Loss 0.0224, Train Accuracy 50.13%
Epoch 2
Train Loss 0.0230, Train Accuracy 60.00%
Train Loss 0.0214, Train Accuracy 54.36%
Train Loss 0.0213, Train Accuracy 53.50%
Train Loss 0.0212, Train Accuracy 53.66%
Train Loss 0.0210, Train Accuracy 53.89%
Train Loss 0.0209, Train Accuracy 53.91%
Train Loss 0.0208, Train Accuracy 53.91%
Train Loss 0.0208, Train Accuracy

In [5]:
## Training time on GPU...
## Device Agnostic
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [6]:
## creating Training Routine
def train(model, device, train_loader, criterion, optimizer, epoch):
    model.train()
    print('Epoch {}'.format(epoch))
    train_correct , train_loss, train_l = 0, 0, 0
    
    ## loop per epoch
    
    for i, (batch, targets) in enumerate(train_loader):
        bs = batch.shape[0]
        batch = batch.to(device)
        targets = targets.to(device)
        
        output = model(batch)
        loss = criterion(output, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        pred = output.max(1, keepdim=True)[1]
        train_correct += pred.eq(targets.view_as(pred)).sum().item()
        train_loss += loss.item()
        train_l += bs
        
        if i%100==10:
            print('Train Loss {:.4f}, Train Accuracy {:.2f}%'.format(
            train_loss/train_l, 100*train_correct/train_l))
    

In [7]:
device = 'cpu'
net = nn.Sequential(nn.Linear(3*32*32, 1000), nn.ReLU(), nn.Linear(1000, 10))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr = 0.01, momentum=0.9)
net = net.to(device)

In [8]:
%%time
train(net, device, train_loader, criterion, optimizer,1)

Epoch 1
Train Loss 0.0347, Train Accuracy 17.76%
Train Loss 0.0295, Train Accuracy 32.98%
Train Loss 0.0282, Train Accuracy 36.58%
Train Loss 0.0274, Train Accuracy 38.26%
Train Loss 0.0267, Train Accuracy 39.91%
Train Loss 0.0263, Train Accuracy 40.66%
Train Loss 0.0261, Train Accuracy 41.34%
Train Loss 0.0258, Train Accuracy 41.91%
CPU times: user 36.7 s, sys: 78.8 ms, total: 36.8 s
Wall time: 19 s


In [9]:
## Lets also write the test Routine

def test(model, device, test_loader):
    model.eval()
    test_correct, test_l = 0, 0
    
    ## Loop per epoch
    for i, (batch, targets) in enumerate(test_loader):
        bs = batch.shape[0]
        batch = batch.to(device)
        targets = targets.to(device)
        output = model(batch)
        pred = output.max(1, keepdim=True)[1]
        test_correct += pred.eq(targets.view_as(pred)).sum().item()
        test_l += bs
    
    print('End of the Testing. Test Accuracy {:.2f}%'.format(
    100 * test_correct / test_l))
    

In [10]:
## Time to utilise the GPU 
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = nn.Sequential(nn.Linear(3*32*32, 1000), nn.ReLU(), nn.Linear(1000,10))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr = 0.01, momentum=0.9)
net = net.to(device)

for i in range(3):
    train(net, device, train_loader, criterion, optimizer, i)
    test(net, device, test_loader)



Epoch 0
Train Loss 0.0347, Train Accuracy 19.03%
Train Loss 0.0295, Train Accuracy 33.09%
Train Loss 0.0282, Train Accuracy 36.57%
Train Loss 0.0274, Train Accuracy 38.36%
Train Loss 0.0267, Train Accuracy 39.89%
Train Loss 0.0264, Train Accuracy 40.67%
Train Loss 0.0261, Train Accuracy 41.40%
Train Loss 0.0258, Train Accuracy 41.98%
End of the Testing. Test Accuracy 48.05%
Epoch 1
Train Loss 0.0230, Train Accuracy 47.73%
Train Loss 0.0230, Train Accuracy 49.09%
Train Loss 0.0230, Train Accuracy 48.93%
Train Loss 0.0228, Train Accuracy 49.33%
Train Loss 0.0226, Train Accuracy 49.54%
Train Loss 0.0225, Train Accuracy 49.69%
Train Loss 0.0225, Train Accuracy 49.80%
Train Loss 0.0224, Train Accuracy 49.97%
End of the Testing. Test Accuracy 50.33%
Epoch 2
Train Loss 0.0210, Train Accuracy 54.12%
Train Loss 0.0212, Train Accuracy 53.31%
Train Loss 0.0212, Train Accuracy 53.10%
Train Loss 0.0211, Train Accuracy 53.34%
Train Loss 0.0209, Train Accuracy 53.64%
Train Loss 0.0209, Train Accuracy

In [11]:
## Lets do that again but this time with no RELU() !!!!
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = nn.Sequential(nn.Linear(3*32*32, 1000), nn.Linear(1000,10))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr = 0.01, momentum=0.9)
net = net.to(device)

for i in range(3):
    train(net, device, train_loader, criterion, optimizer, i)
    test(net, device, test_loader)

Epoch 0
Train Loss 0.0335, Train Accuracy 22.73%
Train Loss 0.0301, Train Accuracy 32.62%
Train Loss 0.0296, Train Accuracy 34.26%
Train Loss 0.0292, Train Accuracy 34.80%
Train Loss 0.0289, Train Accuracy 35.83%
Train Loss 0.0287, Train Accuracy 36.04%
Train Loss 0.0286, Train Accuracy 36.31%
Train Loss 0.0285, Train Accuracy 36.39%
End of the Testing. Test Accuracy 38.80%
Epoch 1
Train Loss 0.0268, Train Accuracy 39.06%
Train Loss 0.0272, Train Accuracy 39.43%
Train Loss 0.0275, Train Accuracy 38.96%
Train Loss 0.0275, Train Accuracy 38.80%
Train Loss 0.0274, Train Accuracy 39.21%
Train Loss 0.0274, Train Accuracy 39.10%
Train Loss 0.0274, Train Accuracy 39.21%
Train Loss 0.0274, Train Accuracy 39.07%
End of the Testing. Test Accuracy 39.12%
Epoch 2
Train Loss 0.0264, Train Accuracy 41.19%
Train Loss 0.0269, Train Accuracy 40.82%
Train Loss 0.0272, Train Accuracy 39.91%
Train Loss 0.0272, Train Accuracy 39.82%
Train Loss 0.0271, Train Accuracy 40.24%
Train Loss 0.0271, Train Accuracy

In [12]:
## Seems like Accuracy decreased !!!
## Lets do it again with relu but with one epoch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = nn.Sequential(nn.Linear(3*32*32, 1000), nn.ReLU(), nn.Linear(1000,10))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr = 0.01, momentum=0.9)
net = net.to(device)

for i in range(1):
    train(net, device, train_loader, criterion, optimizer, i)
    test(net, device, test_loader)

Epoch 0
Train Loss 0.0346, Train Accuracy 18.18%
Train Loss 0.0294, Train Accuracy 33.02%
Train Loss 0.0281, Train Accuracy 36.52%
Train Loss 0.0274, Train Accuracy 38.25%
Train Loss 0.0267, Train Accuracy 39.85%
Train Loss 0.0263, Train Accuracy 40.62%
Train Loss 0.0260, Train Accuracy 41.37%
Train Loss 0.0258, Train Accuracy 42.02%
End of the Testing. Test Accuracy 47.76%


In [13]:
## Lets do something different and add more layers to see it effects!!
## And run it through 5 epochs
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = nn.Sequential(nn.Linear(3*32*32, 1000), nn.ReLU(),
                    nn.Linear(1000,1000),
                    nn.ReLU(),
                    nn.Linear(1000,1000),
                    nn.ReLU()
                    ,nn.Linear(1000,10))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr = 0.01, momentum=0.9)
net = net.to(device)

for i in range(5):
    train(net, device, train_loader, criterion, optimizer, i)
    test(net, device, test_loader)

Epoch 0
Train Loss 0.0360, Train Accuracy 10.51%
Train Loss 0.0337, Train Accuracy 22.40%
Train Loss 0.0316, Train Accuracy 27.21%
Train Loss 0.0302, Train Accuracy 30.40%
Train Loss 0.0291, Train Accuracy 33.22%
Train Loss 0.0284, Train Accuracy 34.79%
Train Loss 0.0279, Train Accuracy 36.08%
Train Loss 0.0274, Train Accuracy 37.20%
End of the Testing. Test Accuracy 47.23%
Epoch 1
Train Loss 0.0234, Train Accuracy 45.03%
Train Loss 0.0234, Train Accuracy 46.86%
Train Loss 0.0233, Train Accuracy 47.30%
Train Loss 0.0232, Train Accuracy 47.90%
Train Loss 0.0229, Train Accuracy 48.43%
Train Loss 0.0227, Train Accuracy 48.73%
Train Loss 0.0226, Train Accuracy 48.96%
Train Loss 0.0225, Train Accuracy 49.23%
End of the Testing. Test Accuracy 51.09%
Epoch 2
Train Loss 0.0208, Train Accuracy 54.69%
Train Loss 0.0209, Train Accuracy 53.00%
Train Loss 0.0209, Train Accuracy 52.88%
Train Loss 0.0208, Train Accuracy 53.29%
Train Loss 0.0206, Train Accuracy 53.66%
Train Loss 0.0205, Train Accuracy

In [14]:
for i in range(10):
    train(net, device, train_loader, criterion, optimizer, i)

Epoch 0
Train Loss 0.0155, Train Accuracy 65.91%
Train Loss 0.0157, Train Accuracy 64.44%
Train Loss 0.0158, Train Accuracy 64.51%
Train Loss 0.0156, Train Accuracy 64.94%
Train Loss 0.0154, Train Accuracy 65.24%
Train Loss 0.0153, Train Accuracy 65.63%
Train Loss 0.0152, Train Accuracy 66.01%
Train Loss 0.0151, Train Accuracy 66.15%
Epoch 1
Train Loss 0.0138, Train Accuracy 68.89%
Train Loss 0.0139, Train Accuracy 68.40%
Train Loss 0.0140, Train Accuracy 68.63%
Train Loss 0.0138, Train Accuracy 69.08%
Train Loss 0.0137, Train Accuracy 69.36%
Train Loss 0.0136, Train Accuracy 69.57%
Train Loss 0.0135, Train Accuracy 70.02%
Train Loss 0.0134, Train Accuracy 70.18%
Epoch 2
Train Loss 0.0119, Train Accuracy 72.44%
Train Loss 0.0122, Train Accuracy 72.71%
Train Loss 0.0123, Train Accuracy 72.85%
Train Loss 0.0121, Train Accuracy 73.29%
Train Loss 0.0120, Train Accuracy 73.60%
Train Loss 0.0119, Train Accuracy 73.93%
Train Loss 0.0118, Train Accuracy 74.23%
Train Loss 0.0117, Train Accuracy

In [15]:
for i in range(10):
    train(net, device, train_loader, criterion, optimizer, i)

Epoch 0
Train Loss 0.0041, Train Accuracy 91.05%
Train Loss 0.0047, Train Accuracy 89.26%
Train Loss 0.0046, Train Accuracy 89.43%
Train Loss 0.0047, Train Accuracy 89.28%
Train Loss 0.0048, Train Accuracy 89.18%
Train Loss 0.0047, Train Accuracy 89.22%
Train Loss 0.0047, Train Accuracy 89.23%
Train Loss 0.0048, Train Accuracy 89.16%
Epoch 1
Train Loss 0.0049, Train Accuracy 89.77%
Train Loss 0.0044, Train Accuracy 90.09%
Train Loss 0.0045, Train Accuracy 90.11%
Train Loss 0.0045, Train Accuracy 90.03%
Train Loss 0.0046, Train Accuracy 89.83%
Train Loss 0.0045, Train Accuracy 89.99%
Train Loss 0.0045, Train Accuracy 90.03%
Train Loss 0.0045, Train Accuracy 90.05%
Epoch 2
Train Loss 0.0043, Train Accuracy 91.76%
Train Loss 0.0043, Train Accuracy 90.23%
Train Loss 0.0040, Train Accuracy 90.90%
Train Loss 0.0039, Train Accuracy 91.31%
Train Loss 0.0038, Train Accuracy 91.46%
Train Loss 0.0038, Train Accuracy 91.46%
Train Loss 0.0038, Train Accuracy 91.40%
Train Loss 0.0039, Train Accuracy

In [16]:
optimizer = torch.optim.SGD(net.parameters(), lr = 0.001, momentum = 0.9)
for i in range(5):
    train(net, device, train_loader, criterion, optimizer, i)

Epoch 0
Train Loss 0.0017, Train Accuracy 96.31%
Train Loss 0.0022, Train Accuracy 95.41%
Train Loss 0.0023, Train Accuracy 95.07%
Train Loss 0.0023, Train Accuracy 95.17%
Train Loss 0.0022, Train Accuracy 95.36%
Train Loss 0.0020, Train Accuracy 95.70%
Train Loss 0.0019, Train Accuracy 96.07%
Train Loss 0.0017, Train Accuracy 96.47%
Epoch 1
Train Loss 0.0005, Train Accuracy 99.43%
Train Loss 0.0007, Train Accuracy 98.79%
Train Loss 0.0007, Train Accuracy 98.82%
Train Loss 0.0007, Train Accuracy 98.85%
Train Loss 0.0007, Train Accuracy 98.79%
Train Loss 0.0007, Train Accuracy 98.84%
Train Loss 0.0007, Train Accuracy 98.91%
Train Loss 0.0006, Train Accuracy 98.98%
Epoch 2
Train Loss 0.0003, Train Accuracy 99.72%
Train Loss 0.0005, Train Accuracy 99.32%
Train Loss 0.0005, Train Accuracy 99.36%
Train Loss 0.0005, Train Accuracy 99.35%
Train Loss 0.0005, Train Accuracy 99.33%
Train Loss 0.0005, Train Accuracy 99.36%
Train Loss 0.0004, Train Accuracy 99.39%
Train Loss 0.0004, Train Accuracy

In [17]:
test(net, device, test_loader)

End of the Testing. Test Accuracy 58.19%


In [18]:
# print(net.weight)
net = nn.Linear(2,2)
w = net.weight
print(w)

Parameter containing:
tensor([[-0.2548, -0.1505],
        [ 0.2258, -0.6179]], requires_grad=True)


In [19]:
x = torch.rand(1,2)
y = net(x).sum()

y.backward()
net.weight.data -= 0.01 * net.weight.grad 
print(w)

Parameter containing:
tensor([[-0.2569, -0.1598],
        [ 0.2237, -0.6272]], requires_grad=True)


In [20]:
net = nn.Linear(2,2)
w = net.weight.clone()
print(w)

tensor([[-0.5970,  0.3632],
        [ 0.3057, -0.6260]], grad_fn=<CloneBackward0>)


In [21]:
x = torch.rand(1,2)
y = net(x).sum()
y.backward()
net.weight.data -= 0.01 * net.weight.grad
print(w)

tensor([[-0.5970,  0.3632],
        [ 0.3057, -0.6260]], grad_fn=<CloneBackward0>)


In [22]:
## Sharing weights

net = nn.Sequential(nn.Linear(2,2), nn.Linear(2,2))
net[0].weight = net[1].weight

x = torch.rand(1,2)
y = net(x).sum()
y.backward()
print(net[0].weight.grad)
print(net[1].weight.grad)


tensor([[-0.5326, -0.2379],
        [ 0.1200,  0.8089]])
tensor([[-0.5326, -0.2379],
        [ 0.1200,  0.8089]])
