In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

def get_data_loader(training = True):
    """

    INPUT: 
        An optional boolean argument (default value is True for training dataset)

    RETURNS:
        Dataloader for the training set (if training = True) or the test set (if training = False)
    """
    custom_transform =transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])
    
    # load to GPU
    train_set = datasets.FashionMNIST('./data',train = True, download = True,transform=custom_transform, target_transform=None)
    test_set = datasets.FashionMNIST('./data', train = False, transform = custom_transform, target_transform = None)

    if training == True:
        return torch.utils.data.DataLoader(train_set, batch_size = 64)
    else:
        return torch.utils.data.DataLoader(test_set, batch_size = 64)

def build_model():
    """

    INPUT: 
        None

    RETURNS:
        An untrained neural network model
    """

    model = nn.Sequential(
        nn.Flatten(),
        # A Dense layer with 128 nodes and a ReLU activation.
        nn.Linear(784, 128),
        nn.ReLU(),
        # A Dense layer with 64 nodes and a ReLU activation.
        nn.Linear(128, 64),
        nn.ReLU(),
        # A Dense layer with 10 nodes
        nn.Linear(64, 10),
        )
    
    # move to GPU
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    return model

def train_model(model, train_loader, criterion, T):
    """

    INPUT: 
        model - the model produced by the previous function
        train_loader  - the train DataLoader produced by the first function
        criterion   - cross-entropy 
        T - number of epochs for training

    RETURNS:
        None
    """

    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    model.train()

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    for epoch in range(T):
        correct = 0
        for images, labels in train_loader:
            # move to GPU
            images, labels = images.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()
            
            result = model(images)
            # get the max possibility
            predicted = torch.argmax(result, dim=1)
            # get the number of correct predictions
            correct += (predicted == labels).sum().item()
            
            loss = criterion(result, labels)
            loss.backward()
            optimizer.step()
            
        # print statistics in following format: Train Epoch: ? Accuracy: ?/?(??.??%) Loss: ?.???
        print('Train Epoch: {} Accuracy: {}/{}({:.2f}%) Loss: {:.3f}'.format(
            epoch, correct, len(train_loader.dataset),
            correct / len(train_loader.dataset) * 100, loss.item()))
    
def evaluate_model(model, test_loader, criterion, show_loss = True):
    """

    INPUT: 
        model - the the trained model produced by the previous function
        test_loader    - the test DataLoader
        criterion   - cropy-entropy 

    RETURNS:
        None
    """

    model.eval()
    correct = 0
    average_loss = 0

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    with torch.no_grad():
        for images, labels in test_loader:
            # move to GPU
            images, labels = images.to(device), labels.to(device)
            
            result = model(images)
            predicted = torch.argmax(result, dim=1)
            correct += (predicted == labels).sum().item()

            # get the average loss, not sure
            average_loss += criterion(result, labels).item() * len(images) / len(test_loader.dataset)

    if show_loss == False:
        print('Accuracy: {:.2f}%'.format(correct / len(test_loader.dataset) * 100))
    else:
        print('Average loss: {:.4f}\nAccuracy: {:.2f}%'.format(average_loss, correct / len(test_loader.dataset) * 100))

def predict_label(model, test_images, index):
    """

    INPUT: 
        model - the trained model
        test_images   -  a tensor. test image set of shape Nx1x28x28
        index   -  specific index  i of the image to be tested: 0 <= i <= N - 1


    RETURNS:
        None
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # move to GPU
    model = model.to(device)
    test_images = test_images.to(device)

    class_names = ['T-shirt/top','Trouser','Pullover','Dress','Coat','Sandal','Shirt'
,'Sneaker','Bag','Ankle Boot']
    # pick the image at the specific index
    result = model(test_images[index])
    # convert from logit to probability
    probability = F.softmax(result, dim=1)

    # return the index of the top 3 classes in tensor
    values,indices = probability.topk(3, largest=True, sorted=True, dim=1)

    print('Top 3 classes: {}, {}, {}'.format(class_names[indices[0][0]], class_names[indices[0][1]], class_names[indices[0][2]]))

    # extract top 3 classes and their probabilities
    top_3_classes = [class_names[indices[0][0]], class_names[indices[0][1]], class_names[indices[0][2]]]
    top_3_probabilities = [values[0][0].item(), values[0][1].item(), values[0][2].item()]

In [2]:
criterion = nn.CrossEntropyLoss()

model = build_model()
print(model)      

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=128, bias=True)
  (2): ReLU()
  (3): Linear(in_features=128, out_features=64, bias=True)
  (4): ReLU()
  (5): Linear(in_features=64, out_features=10, bias=True)
)


In [3]:
train_model(model, get_data_loader(), criterion, 10)

Train Epoch: 0 Accuracy: 42838/60000(71.40%) Loss: 0.720
Train Epoch: 1 Accuracy: 49389/60000(82.31%) Loss: 0.595
Train Epoch: 2 Accuracy: 50419/60000(84.03%) Loss: 0.511
Train Epoch: 3 Accuracy: 51069/60000(85.11%) Loss: 0.443
Train Epoch: 4 Accuracy: 51510/60000(85.85%) Loss: 0.398
Train Epoch: 5 Accuracy: 51822/60000(86.37%) Loss: 0.354
Train Epoch: 6 Accuracy: 52118/60000(86.86%) Loss: 0.308
Train Epoch: 7 Accuracy: 52359/60000(87.27%) Loss: 0.276
Train Epoch: 8 Accuracy: 52598/60000(87.66%) Loss: 0.250
Train Epoch: 9 Accuracy: 52765/60000(87.94%) Loss: 0.230


In [4]:
evaluate_model(model, get_data_loader(False), criterion, show_loss = True)

Average loss: 0.3834
Accuracy: 86.10%


In [5]:
images, labels = next(iter(get_data_loader(False)))

In [6]:
def predict_label(model, test_images, index):
    """

    INPUT: 
        model - the trained model
        test_images   -  a tensor. test image set of shape Nx1x28x28
        index   -  specific index  i of the image to be tested: 0 <= i <= N - 1


    RETURNS:
        None
    """
    class_names = ['T-shirt/top','Trouser','Pullover','Dress','Coat','Sandal','Shirt'
,'Sneaker','Bag','Ankle Boot']
    # pick the image at the specific index
    result = model(test_images[index])
    # convert from logit to probability
    probability = F.softmax(result, dim=1)

    # return the index of the top n classes in tensor
    n = 3
    values,indices = probability.topk(n, largest=True, sorted=True, dim=1)

    top_n_classes = []
    top_n_probabilities = []

    for i in range(n):
        top_n_classes.append(class_names[indices[0][i]])
        top_n_probabilities.append(values[0][i].item())

    for i in range(n):
        print('{}: {:.2f}%'.format(top_n_classes[i], top_n_probabilities[i] * 100))

In [7]:
predict_label(model, images, 1)

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)