In [1]:
import torch
import torchvision
from torchvision import transforms, datasets
import torch.nn as nn 
import torch.nn.functional as F #always pass params
import torch.optim as optim
# training dataset
train = datasets.MNIST("", train=True, 
                       download=True, 
                       transform=transforms.Compose([transforms.ToTensor()])
                      )
# Testing dataset (data that machine never seen before use that )
test = datasets.MNIST("", train=False, 
                      download=True, 
                      transform=transforms.Compose([transforms.ToTensor()])
                     )

trainset = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True)
testset = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True)

In [2]:
class Net(nn.Module):
    def __init__(self):
        super().__init__() #IMPORTANT to initiate the init method
        self.fc1 = nn.Linear(28*28, 64) #fully connected layer
        self.fc2 = nn.Linear(64, 64) #fully connected layer
        self.fc3 = nn.Linear(64, 64) #fully connected layer
        self.fc4 = nn.Linear(64, 10) #10 because we've 10 classes
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        
        return F.log_softmax(x, dim=1)
        

net = Net()
print(net)

Net(
  (fc1): Linear(in_features=784, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)


In [3]:
X = torch.rand(28, 28)

X  = X.view(-1, 28*28) # -1 specifies, input is of unknown shape


In [4]:
output = net(X)
output

tensor([[-2.2572, -2.1881, -2.2078, -2.4704, -2.4387, -2.3337, -2.3953, -2.2394,
         -2.2012, -2.3414]], grad_fn=<LogSoftmaxBackward>)

In [5]:
optimizer = optim.Adam(net.parameters(), lr=0.001)

EPOCHS = 3

for epoch in range(EPOCHS):
    for data in trainset:
        #data is batch of featuresets & labels
        X, y = data
        net.zero_grad()
        output =  net(X.view(-1, 28*28))
        loss = F.nll_loss(output, y)
        # calculate either on one hot vector or on vector
        loss.backward() #loss backward for gradient descend
        optimizer.step()
    print(loss)

tensor(0.2016, grad_fn=<NllLossBackward>)
tensor(0.0508, grad_fn=<NllLossBackward>)
tensor(0.2113, grad_fn=<NllLossBackward>)


In [6]:
correct = 0
total = 0

with torch.no_grad():
    for data in trainset:
        x, y = data
        output = net(X.view(-1, 784))
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1
            total += 1
print("Accuracy: ", round(correct/total, 2))

Accuracy:  0.1
