In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.datasets as Dataset
import torchvision.transforms as Transforms

In [90]:
class NeuralNet(nn.Module):

    def __init__(self, input_size, hidden_size1,hidden_size2, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [92]:
model = NeuralNet(784,225,50,10)
x = torch.randn(64, 784)
print(model.forward(x))

tensor([[-7.0729e-02,  1.9028e-01, -1.2966e-01,  1.4531e-01,  5.1238e-02,
          1.5413e-01, -1.4303e-01,  1.4381e-01, -9.3080e-02, -1.4391e-01],
        [-2.0356e-02,  5.3236e-02, -2.1384e-02,  3.2317e-02,  2.7859e-02,
         -2.6877e-02, -1.6977e-01,  2.0572e-01,  8.9520e-03, -9.0600e-02],
        [ 8.1504e-02,  6.7948e-02, -6.6629e-02,  5.8014e-02,  6.2482e-02,
         -6.4132e-02, -1.5354e-01,  1.0369e-01, -1.3964e-02, -1.4155e-01],
        [ 4.1267e-02,  4.1639e-03, -2.5967e-02,  5.1997e-02,  8.1266e-02,
          2.7078e-02, -2.3101e-01,  9.8578e-02, -1.9938e-01, -5.0028e-02],
        [-5.9154e-02,  1.5944e-01,  1.3113e-01,  5.8829e-02,  1.1611e-01,
          5.5537e-02, -1.1702e-01,  8.8423e-02, -1.5074e-01, -2.0937e-02],
        [-2.1731e-02,  1.4816e-01,  2.9013e-02,  1.4533e-01,  1.1534e-01,
          7.0762e-02, -1.7427e-01,  9.6455e-02,  1.4628e-02, -1.2738e-01],
        [-1.3967e-02,  1.4416e-01, -2.3666e-02,  2.8941e-02,  2.1658e-03,
          2.6234e-02, -2.1674e-0

In [117]:
# set device
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
device

device(type='cpu')

In [116]:
# Hyperparameters
INPUT_SIZE = 784
NUM_CLASSES = 10
HIDDEN_SIZE1 = 225
HIDDEN_SIZE2 = 50
LEARNING_RATE = 0.001
BATCH_SIZE = 64
NUM_EPOCHS = 10

In [95]:
# Load Data
'''
Transform the data from numpy arrays to tensor and save it in data folder
'''
train_dataset = Dataset.MNIST(root="data/", train=True,download=True, transform=Transforms.ToTensor()) 
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)

# test set
test_dataset = Dataset.MNIST(root="data/", train=False, download=True, transform=Transforms.ToTensor()) 
testloader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=True)


In [65]:
for id, (data, target), in enumerate(train_loader):
    print(id)
    print(target)
    break
    print("----=-=-=-")



0
tensor([0, 2, 8, 3, 7, 2, 2, 0, 1, 3, 0, 9, 0, 3, 5, 9, 9, 0, 3, 8, 4, 4, 9, 7,
        3, 6, 4, 1, 5, 7, 2, 3, 7, 3, 9, 8, 2, 7, 5, 3, 9, 5, 6, 9, 8, 7, 2, 9,
        2, 9, 0, 2, 7, 2, 5, 4, 5, 3, 6, 4, 4, 1, 8, 8])


In [114]:
# init network
model = NeuralNet(input_size=INPUT_SIZE, hidden_size1=HIDDEN_SIZE1, hidden_size2=HIDDEN_SIZE2, num_classes=NUM_CLASSES).to(device)

AssertionError: Torch not compiled with CUDA enabled

In [97]:
# loss and optimizer
crossEntropy = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [103]:
optimizer.zero_grad()

In [108]:
# train the model

epoch_loss = []
for epoch in range(NUM_EPOCHS):
    print(f'\nepoch ==> {epoch+1}')
    loss = float('-inf')
    for id, (data, targets) in enumerate(train_loader):
        data = data.to(device=device)
        targets = targets.to(device=device)

        data = data.reshape(data.shape[0], -1) # makes each batch dimention form [64, 1,28, 28] to [64, 784]
        
        # forward pass
        score = model.forward(data)
        loss = crossEntropy(score,targets)
        # print(f'score - {loss}')

        # backpass
        optimizer.zero_grad() # set each gradient to 0 initially
        loss.backward()

        # optimization or gradient decent
        optimizer.step()

    epoch_loss.append(loss)
    print(f"Loss -> {loss}")
    print("=============================")

        
        


epoch ==> 1
Loss -> 0.00012198904732940719

epoch ==> 2
Loss -> 4.546681520878337e-05

epoch ==> 3
Loss -> 3.85180173907429e-05

epoch ==> 4
Loss -> 0.00015153099957387894

epoch ==> 5
Loss -> 0.0001348896330455318

epoch ==> 6
Loss -> 8.355016689165495e-06

epoch ==> 7
Loss -> 3.65123305527959e-05

epoch ==> 8
Loss -> 4.283939233573619e-06

epoch ==> 9
Loss -> 7.487773245884455e-07

epoch ==> 10
Loss -> 0.0005914241191931069


In [106]:
# accuracy on test and test
def check_accuracy(loader, model):
    if loader.dataset.train:
        print("Accuracy on train data")
    else:
        print("Accuracy on test data")

    num_correct = 0
    num_sample = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            
            x = x.reshape(x.shape[0], -1)

            score = model.forward(x)
            _, predictions = score.max(1)
            num_correct += (predictions == y).sum()
            num_sample += predictions.size(0)
        print(f'accuracy : {float(num_correct)/float(num_sample)}')

    model.train()


In [107]:
check_accuracy(train_loader, model)
check_accuracy(testloader, model)

Accuracy on train data
accuracy : 0.9992333333333333
Accuracy on test data
accuracy : 0.9812
