In [1]:
from __future__ import print_function # some printing function
import argparse  # module for parsing command-line arguments
import torch #standard stuff
import torch.nn as nn #standard stuff
import torch.nn.functional as F #standard stuff
import torch.optim as optim #standard stuff
from torchvision import datasets, transforms #standard stuff
from torch.optim.lr_scheduler import StepLR #this is to change the learning rate
import sys

In [2]:
class Net(nn.Module): # neural network CLASS
    def __init__(self): #CONSTRUCTOR
        super(Net, self).__init__() # initialize methods from parent nn.Module class
        self.conv1 = nn.Conv2d(1, 32, 3, 1) # one channel (brightness) gets divided to 32 channels
                                            # 3x3 kernel with stride of 1
        self.conv2 = nn.Conv2d(32, 64, 3, 1) # 32 channels divided into 64
        self.dropout1 = nn.Dropout(0.25) #25% of the inputs are dropped out
        self.dropout2 = nn.Dropout(0.5) #50% of the inputs are dropped out
        self.fc1 = nn.Linear(9216, 128) #ebat 9216 neurouns to 128
        self.fc2 = nn.Linear(128, 10) #128 to 10 reduction

    def forward(self, x): #FORWARD FUNCTION
        x = self.conv1(x) #conv --> relu --> conv --> relu --> max pool --> dropout --
        x = F.relu(x) #--> flatten --> fc --> relu --> dropout -->fc --> softmax --> probas
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1) #it is needed for transition from CNN --> FC
        # stuff from conv layers are tensors, while FCs need one dimensional vector type stuff
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [3]:
def train(args, model, device, train_loader, optimizer, epoch): #TRAINER class
    model.train() # sets model to training mode
    for batch_idx, (data, target) in enumerate(train_loader): # start of the loop
        data, target = data.to(device), target.to(device) #transfer data to GPU
        optimizer.zero_grad() #erase the gradients if there is left any
        output = model(data) #predict stuff
        loss = F.nll_loss(output, target) #use negative log likelyhood loss
        loss.backward() # draw the computational loss and calculate gradient
        optimizer.step() #enforce the changes to parameters
        
        #print stuff
        if batch_idx % args.log_interval == 0: 
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if args.dry_run:
                break

In [4]:
def test(model, device, test_loader): #TESTER class
    model.eval() #sets model to evaluation mode
    test_loss = 0 #initialize the loss
    correct = 0 #number of correct guesses
    with torch.no_grad(): # saying PyTorch that we aint training here
        for data, target in test_loader: # batches from test_loader
            data, target = data.to(device), target.to(device) # data and target sent to GPU
            output = model(data) # predict stuff
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss.
                                                                        # same loss function is used as in training
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item() #counting the corrects

    test_loss /= len(test_loader.dataset) #avg loss for the dataset
    
    #print final results

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [5]:
def parse_args(): # argparser for training/testing loops
    parser = argparse.ArgumentParser(description='PyTorch MNIST') #added description
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)') #batch size is added
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing (default: 1000)') #test batch size is added
    parser.add_argument('--epochs', type=int, default=14, metavar='N',
                        help='number of epochs to train (default: 14)') #epoch is added
    parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
                        help='learning rate (default: 1.0)') #learning rate is added
    parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
                        help='Learning rate step gamma (default: 0.7)') #gamma for changing learning rate is added
    parser.add_argument('--no-cuda', action='store_true', default=False, 
                        help='disables CUDA training') # GPU stuff
    parser.add_argument('--no-mps', action='store_true', default=False,
                        help='disables macOS GPU training') # GPU Stuff
    parser.add_argument('--dry-run', action='store_true', default=False,
                        help='quickly check a single pass') #wtf is dry run
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)') # seed for reproducability
    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                        help='how many batches to wait before logging training status') #
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()

    return parser.parse_args()

In [None]:
def main():
    # Training settings
    if 'ipykernel' in sys.modules: #if running in jupyter notebook or some other IDE
                                    # you can chang the parameters here
        class Args:
            batch_size = 64
            test_batch_size = 1000
            epochs = 3
            lr = 1.0
            gamma = 0.7
            no_cuda = False
            no_mps = False
            dry_run = False
            seed = 1
            log_interval = 10
            save_model = False
        args = Args()
        
    # if you are running in command line, then you can customize the parameters via command line  
    else: 
        args = parse_args()
    
    # if nvidia, go cuda, if mac, go mps, if amd, go sleep :(
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    use_mps = not args.no_mps and torch.backends.mps.is_available()

    torch.manual_seed(args.seed) #reproducability stuff
    
    if use_cuda: # making sure to select the correct device
        device = torch.device("cuda")
    elif use_mps:
        device = torch.device("mps")
    else:
        device = torch.device("cpu")

        
    # dictionaries that will hold keyword arguments for the training and testing data loader functions
    train_kwargs = {'batch_size': args.batch_size} 
    test_kwargs = {'batch_size': args.test_batch_size}
    
    if use_cuda:
        cuda_kwargs = {'num_workers': 1, # for GPU acceleration 
                       'pin_memory': True, # faster transfer to GPU
                       'shuffle': True} #shuffle data every epoch
    
    #if cuda available update the kwargs dictionary
        train_kwargs.update(cuda_kwargs) 
        test_kwargs.update(cuda_kwargs)

    transform=transforms.Compose([ 
        transforms.ToTensor(), #img to tensor
        transforms.Normalize((0.1307,), (0.3081,)) #normalized mean and stdev for one channel
        ])
    dataset1 = datasets.MNIST('../data', train=True, download=True,
                       transform=transform) # training set downloaded
    dataset2 = datasets.MNIST('../data', train=False, # validation set downloaded
                       transform=transform)
    
    
    train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs) # training dataset into dataloader
                                                            #training kwargs are unpacked
    test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs) # same thing for validation

    model = Net().to(device) #model sent to gpu/cpu
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr) # Adadelta = robust optimizer

    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) # learning rate changer based on optimizer
    
    #training loop
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)
        scheduler.step() # learning rate is changed here




if __name__ == '__main__':
    main()


Test set: Average loss: 0.0460, Accuracy: 9846/10000 (98%)




Test set: Average loss: 0.0363, Accuracy: 9869/10000 (99%)

