In [36]:
import numpy as np
import torch
import torch.nn as nn #provides classes/modules for making neural networks
import torch.nn.functional as F #a module with common nn functions (operations on tensors/high dim matrices; activations)
import torch.optim as optim #contains SGD (Stochastic gradient descent)
from torchvision import datasets, transforms #includes MNIST
from torch.utils.data import DataLoader #allows shuffling and minbatches
from torch.nn import Linear

In [42]:
#Create a NN class that encapsulates all components of NN, instantiate later
# with __call__ object name can be treated as function that calls certain function in class (predefine)
#inherits from nn.Module class; many classes for different layers
class SimpleNN(nn.Module): 
    def __init__(self):
        super(SimpleNN, self).__init__() #explicitly call parent class constructor to initialize stuff
                                    #parent technically initialized too and tied to child but you only access child
        self.fc1 = Linear(28**2, 16) #first fully connected layer has 784 input neurons (performs matrix mult to calculate z)
                                 #next layer has 16 neurons
        self.fc2 = Linear(16, 16)
        self.fc3 = Linear(16, 10)
    
    def forward(self, pixels):
        pixels = pixels.view(-1, 28**2) #dimension of input vector
        pixels = torch.sigmoid(self.fc1(pixels)) #this includes all z stuff and matrix mult
        pixels = torch.sigmoid(self.fc2(pixels))
        pixels = torch.sigmoid(self.fc3(pixels))
        return pixels
    
#Instantiate model
model = SimpleNN()

#Loss function
mse = nn.MSELoss() #making instance of this class to use the functions in it

#Other optimizers than SGD like Adam, takes momentum into account for adaptive learning rate
optimizer = optim.SGD(model.parameters(), lr = 0.001) #iterable (like list) of parameters passed to optimizer

#Load MNIST
#defines transformation to turn images into tensors then normalize them
#normalize transforms the pixel values more condensed/similar so training is faster
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0), 1)])

#transforms and downloads data
train_dataset = datasets.MNIST(root = './data', train = True, download = True, transform = transform)

#creates iterator that provides batches of data during training
#the batches are in form of (image, label) tuple which are just tensors/vectors
train_loader = DataLoader(train_dataset, batch_size = 64, shuffle = True)

#Adds batch size to image tensor in train loader
test_dataset = datasets.MNIST(root = './data', train = False, download = True, transform = transform)
test_loader = DataLoader(test_dataset, batch_size = 64, shuffle = False)

In [43]:
#Traning
'''Each parameter has its own special tensor (created per layer) 
with a number for its partial derivative in .grad attribute of tensor; step() accesses these'''

#Hyperparameter
num_epochs = 5

for epoch in range(num_epochs): #will do training cycle 5 times

    #each iteration is one minibatch of images/labels
    for images, labels in train_loader: 
        optimizer.zero_grad() #zeroing out gradient ignores .grad and recalculates partials in backprop
        outputs = model(images) #passes this to forward

        '''MNIST is (batch_size = ..., channels = 1, height = 28, width = 28)
        Fully connected layers expect input tensors to be 2 dimensional(batch size, num_features)
        View flattens the dimension
        Size of first dimension of tensor is batch size, -1 infers
        Reshapes mini-batch into 2D tensor whre each row is image and column is flattened version
        Often need to flatten into fewer dimensions'''
        images.view(images.size(0), -1) 

        #numclasses is components; ex. 4 = [0,0,0,1,0,0,...]
        target = F.one_hot(labels, num_classes=10).float()
    
        loss = mse(outputs, target)
        loss.backward() #backward propogation to compute gradiaent
        optimizer.step() #updates model parameters (takes "step")
    
    print(f'Epoch {epoch + 1}, Loss: {loss.item(): .4f}') #4 decimal places as floating point number; loss is tensor

Epoch 1, Loss:  0.2807
Epoch 2, Loss:  0.2651
Epoch 3, Loss:  0.2530


In [None]:
#Evaluating/testing model
model.eval()
correct = 0
total = 0
with torch.no
