In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [2]:
if torch.cuda.is_available():
  device=torch.device(type="cuda")
print(device)

cuda


In [3]:
train_dataset=datasets.MNIST(root="data",train=True,download=True,transform=ToTensor())
test_dataset=datasets.MNIST(root="data",train=False,download=True,transform=ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 107161352.06it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 46465551.91it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 24652332.48it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 7611078.21it/s]


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [4]:
batch_size=64
train=DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True
)
test=DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
    shuffle=False
)

In [5]:
class vanillanetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1=nn.Linear(in_features=28*28,out_features=512)
        self.b1=nn.BatchNorm1d(num_features=512)
        self.layer2=nn.Linear(512,256)
        self.b2=nn.BatchNorm1d(num_features=256)
        self.layer3=nn.Linear(256,128)
        self.b3=nn.BatchNorm1d(num_features=128)
        self.layer4=nn.Linear(128,64)
        self.b4=nn.BatchNorm1d(num_features=64)
        self.layer5=nn.Linear(64,32)
        self.b5=nn.BatchNorm1d(num_features=32)
        self.layer6=nn.Linear(32,10)
        self.b6=nn.BatchNorm1d(num_features=10)
        self.r=nn.ReLU()

    def forward(self,x):
        x=self.layer1(x)
        x=self.b1(x)
        x=self.r(x)
        x=self.layer2(x)
        x=self.b2(x)
        x=self.r(x)
        x=self.layer3(x)
        x=self.b3(x)
        x=self.r(x)
        x=self.layer4(x)
        x=self.b4(x)
        x=self.r(x)
        x=self.layer5(x)
        x=self.b5(x)
        x=self.r(x)
        x=self.layer6(x)
        x=self.b6(x)
        x=self.r(x)
        return x

In [6]:
def trainmodel(model,train_dataloader,lossfunction,optimizer):
    model.train()
    correct=0
    totaloss=0
    for i, (img,label) in enumerate(train_dataloader):
        img=torch.reshape(img,(-1,28*28))
        img=img.to(device)
        label=label.to(device)
        prediction=model(img)
        loss=lossfunction(prediction,label)
        totaloss+=loss.item()
        predictedvalue=torch.argmax(prediction,dim=1)
        correct+=(predictedvalue==label).sum().item()
        loss.backward() #backpropogate loss
        optimizer.step() #update parameters
        optimizer.zero_grad() #reset gradients to 0 to prevent accumulation
        
        if (i%200==0):
            batchloss=totaloss/(i+1)
            batchloss=round(batchloss,2)#avg loss for batch
            batchacc=(correct/((i+1)*batch_size))*100 #correct predictions so far
            batchacc=round(batchacc,2)
            print("For {0} batch the loss={1} and accuracy:{2}".format(i+1,batchloss,batchacc))
            
    epochloss=totaloss/len(train_dataloader) #avg loss for 1 batch
    epochloss=round(epochloss,2)
#     totalimages=len(train_dataloader.dataset)
    epochaccuracy=(correct/len(train_dataloader.dataset))*100 
    epochaccuracy=round(epochaccuracy,2)
    return epochloss,epochaccuracy         

In [7]:
def testmodel(model,test_dataloader,lossfunction):
    model.eval()
    correct=0
    totalloss=0
    with torch.no_grad():
        for i, (img,label) in enumerate(test_dataloader):
            img=torch.reshape(img,(-1,28*28))
            img=img.to(device)
            label=label.to(device)
            prediction=model(img)
            loss=lossfunction(prediction,label)
            totalloss+=loss.item()
            predictedvalue=torch.argmax(prediction,dim=1)
            correct+=(predictedvalue==label).sum().type(torch.float).item()

            if(i%50==0):
                batchloss=round((totalloss/(i+1)),2)
                batchacc=(correct/((i+1)*batch_size))*100
                batchacc=round(batchacc,2)
                print("In testing For {0} batch the loss={1} and accuracy:{2}".format(i+1,batchloss,batchacc))
    
    epochloss=round(totalloss/len(test_dataloader),2)
    totalimages=len(test_dataloader.dataset)
    epochacc=correct/totalimages *100
    epochacc=round(epochacc,2)
    
    return epochloss,epochacc
    

In [8]:
#main code
model=vanillanetwork()
model=model.to(device)
lossfunction=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(),lr=0.001)
epochs=20
for i in range(epochs):
    print("Epoch number: ",i+1)
    trainloss,trainacc=trainmodel(model,train,lossfunction,optimizer)
    print("Training epoch loss: ",trainloss,"Training accuracy: ",trainacc)
    testloss,testacc=testmodel(model,test,lossfunction)
    print("------------------")
    print("Testing epoch loss: ",testloss,"Testing accuracy: ",testacc)
    print("\n")

Epoch number:  1
For 1 batch the loss=2.49 and accuracy:9.38
For 201 batch the loss=0.69 and accuracy:88.11
For 401 batch the loss=0.56 and accuracy:90.7
For 601 batch the loss=0.48 and accuracy:91.99
For 801 batch the loss=0.43 and accuracy:92.6
Training epoch loss:  0.41 Training accuracy:  92.94
In testing For 1 batch the loss=0.1 and accuracy:100.0
In testing For 51 batch the loss=0.22 and accuracy:95.53
In testing For 101 batch the loss=0.2 and accuracy:96.24
In testing For 151 batch the loss=0.18 and accuracy:97.03
------------------
Testing epoch loss:  0.18 Testing accuracy:  96.96


Epoch number:  2
For 1 batch the loss=0.16 and accuracy:96.88
For 201 batch the loss=0.2 and accuracy:96.42
For 401 batch the loss=0.2 and accuracy:96.31
For 601 batch the loss=0.2 and accuracy:96.2
For 801 batch the loss=0.19 and accuracy:96.27
Training epoch loss:  0.18 Training accuracy:  96.27
In testing For 1 batch the loss=0.05 and accuracy:100.0
In testing For 51 batch the loss=0.14 and accu