In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.datasets as Dataset
import torchvision.transforms as Transforms

In [4]:
class NeuralNet(nn.Module):

    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [17]:
model = NeuralNet(784,50,10)
x = torch.randn(64, 784)
print(model.forward(x))

tensor([[-0.0426, -0.3446,  0.2274, -0.2598, -0.1002,  0.0891, -0.0981,  0.0544,
         -0.1269,  0.4067],
        [-0.2905,  0.0494,  0.4223, -0.3073, -0.1803,  0.1236, -0.1611, -0.2429,
         -0.0566,  0.4715],
        [-0.2657, -0.1046,  0.0569, -0.4400, -0.2916,  0.0652,  0.3386,  0.2083,
         -0.3150,  0.4140],
        [-0.2457,  0.0339,  0.2699, -0.1339, -0.3600,  0.3843, -0.1498,  0.1070,
          0.1391,  0.2254],
        [-0.3376,  0.1499,  0.2337, -0.3455, -0.2007,  0.0054,  0.1798,  0.0334,
          0.1015,  0.3986],
        [-0.2167, -0.0264,  0.1170, -0.2080, -0.0290,  0.0064, -0.1645,  0.0929,
         -0.1057,  0.4056],
        [-0.2358, -0.1174,  0.3885, -0.1845, -0.1537,  0.4137,  0.0967, -0.1771,
         -0.1859,  0.1680],
        [-0.4838, -0.2491, -0.0972, -0.2732, -0.1090,  0.0965,  0.2221,  0.1233,
         -0.2284,  0.4410],
        [-0.1883, -0.0197, -0.1010, -0.2755, -0.2037, -0.0199,  0.1029,  0.1175,
         -0.3727,  0.4607],
        [-0.1893, -

In [27]:
# set device
# device = torch.device('cuda' if torch.cuda.is_available else "cpu")
device = torch.device("cpu")

In [83]:
# Hyperparameters
INPUT_SIZE = 784
NUM_CLASSES = 10
HIDDEN_SIZE = 50
LEARNING_RATE = 0.001
BATCH_SIZE = 64
NUM_EPOCHS = 3

In [16]:
# Load Data
'''
Transform the data from numpy arrays to tensor and save it in data folder
'''
train_dataset = Dataset.MNIST(root="data/", train=True,download=True, transform=Transforms.ToTensor()) 
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)

# test set
test_dataset = Dataset.MNIST(root="data/", train=False, download=True, transform=Transforms.ToTensor()) 
testloader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=True)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:04<00:00, 2147064.36it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 2045209.17it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 2119703.65it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 671005.91it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw






In [65]:
for id, (data, target), in enumerate(train_loader):
    print(id)
    print(target)
    break
    print("----=-=-=-")



0
tensor([0, 2, 8, 3, 7, 2, 2, 0, 1, 3, 0, 9, 0, 3, 5, 9, 9, 0, 3, 8, 4, 4, 9, 7,
        3, 6, 4, 1, 5, 7, 2, 3, 7, 3, 9, 8, 2, 7, 5, 3, 9, 5, 6, 9, 8, 7, 2, 9,
        2, 9, 0, 2, 7, 2, 5, 4, 5, 3, 6, 4, 4, 1, 8, 8])


In [84]:
# init network
model = NeuralNet(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, num_classes=NUM_CLASSES).to(device)

In [85]:
# loss and optimizer
crossEntropy = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [86]:
# train the model
for epoch in range(NUM_EPOCHS):
    print(f'\nepoch ==> {epoch+1}')
    loss = float('-inf')
    for id, (data, targets) in enumerate(train_loader):
        data = data.to(device=device)
        targets = targets.to(device=device)

        data = data.reshape(data.shape[0], -1) # makes each batch dimention form [64, 1,28, 28] to [64, 784]
        
        # forward pass
        score = model.forward(data)
        loss = crossEntropy(score,targets)
        # print(f'score - {loss}')

        # backpass
        optimizer.zero_grad() # set each gradient to 0 initially
        loss.backward()

        # optimization or gradient decent
        optimizer.step()

    print(f"Loss -> {loss}")
    print("=============================")

        
        


epoch ==> 1
Loss -> 0.44054874777793884

epoch ==> 2
Loss -> 0.164377823472023

epoch ==> 3
Loss -> 0.05167372524738312


In [88]:
# accuracy on test and test
def check_accuracy(loader, model):
    if loader.dataset.train:
        print("Accuracy on train data")
    else:
        print("Accuracy on test data")

    num_correct = 0
    num_sample = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            
            x = x.reshape(x.shape[0], -1)

            score = model.forward(x)
            _, predictions = score.max(1)
            num_correct += (predictions == y).sum()
            num_sample += predictions.size(0)
        print(f'accuracy : {float(num_correct)/float(num_sample)}')

    model.train()


In [89]:
check_accuracy(train_loader, model)
check_accuracy(testloader, model)

Accuracy on train data
accuracy : 0.9600166666666666
Accuracy on test data
accuracy : 0.9552
