In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F
import numpy as np


#If cuda device exists, use that. If not, default to CPU.
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [2]:
#Hyperparameters
num_epochs = 15
num_classes = 10
batch_size = 128
learning_rate = 0.001

In [3]:
#Load MNIST data set
train_dataset = torchvision.datasets.MNIST(root='../../data/',
                                           train=True, 
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='../../data/',
                                          train=False, 
                                          transform=transforms.ToTensor())

#Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../../data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../../data/MNIST/raw/train-images-idx3-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../../data/MNIST/raw



In [4]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dense1 = nn.Linear(12*12*64, 128)
        self.dense2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.dropout(x, 0.25)
        x = x.view(-1, 12*12*64)
        x = F.relu(self.dense1(x))
        x = F.dropout(x, 0.5)
        x = self.dense2(x)
        return F.log_softmax(x, dim=1)
    


In [5]:
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))


Epoch [1/15], Step [100/469], Loss: 0.3744
Epoch [1/15], Step [200/469], Loss: 0.2304
Epoch [1/15], Step [300/469], Loss: 0.1669
Epoch [1/15], Step [400/469], Loss: 0.2249
Epoch [2/15], Step [100/469], Loss: 0.1670
Epoch [2/15], Step [200/469], Loss: 0.0870
Epoch [2/15], Step [300/469], Loss: 0.0538
Epoch [2/15], Step [400/469], Loss: 0.1994
Epoch [3/15], Step [100/469], Loss: 0.0366
Epoch [3/15], Step [200/469], Loss: 0.0229
Epoch [3/15], Step [300/469], Loss: 0.0576
Epoch [3/15], Step [400/469], Loss: 0.0338
Epoch [4/15], Step [100/469], Loss: 0.1012
Epoch [4/15], Step [200/469], Loss: 0.0616
Epoch [4/15], Step [300/469], Loss: 0.0172
Epoch [4/15], Step [400/469], Loss: 0.0462
Epoch [5/15], Step [100/469], Loss: 0.0732
Epoch [5/15], Step [200/469], Loss: 0.1537
Epoch [5/15], Step [300/469], Loss: 0.0365
Epoch [5/15], Step [400/469], Loss: 0.0205
Epoch [6/15], Step [100/469], Loss: 0.1060
Epoch [6/15], Step [200/469], Loss: 0.0367
Epoch [6/15], Step [300/469], Loss: 0.0459
Epoch [6/15

In [6]:
from sklearn.metrics import roc_auc_score

preds = []
y_true = []
# Test the model
model.eval()  # Set model to evaluation mode.
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()    
        detached_pred = predicted.detach().cpu().numpy()
        detached_label = labels.detach().cpu().numpy()
        for f in range(0, len(detached_pred)):
            preds.append(detached_pred[f])
            y_true.append(detached_label[f])
        
    print('Test Accuracy of the model on the 10000 test images: {:.2%}'.format(correct / total))
    
    preds = np.eye(num_classes)[preds]
    y_true = np.eye(num_classes)[y_true]    
    auc = roc_auc_score(preds, y_true)
    print("AUC: {:.2%}".format (auc))
# Save the model checkpoint
torch.save(model.state_dict(), 'pytorch_mnist_cnn.ckpt')

Test Accuracy of the model on the 10000 test images: 98.45%
AUC: 99.13%


In [7]:
with torch.no_grad():
    correct = 0
    total = 0
    predss = []
    matches = []
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        for f in range(0, len(predicted.detach().cpu().numpy())):
            predss.append(predicted.detach().cpu().numpy()[f])
            matches.append(labels.detach().cpu().numpy()[f])
            
#     preds = np.eye(num_classes)[predicted.detach().cpu().numpy()]
#     y_true = np.eye(num_classes)[labels.detach().cpu().numpy()]
#     print(predss, matches)
    preds = np.eye(num_classes)[predss]
    y_true = np.eye(num_classes)[matches]
    
    print(preds == y_true)
    auc = roc_auc_score(preds, y_true)
    print("AUC: {:.2%}".format (auc))

[[ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 ...
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]
 [ True  True  True ...  True  True  True]]
AUC: 99.18%


In [8]:
# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
    
    
    def __init__(self, num_classes=10):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=0),
#             nn.BatchNorm2d(16),
            nn.ReLU())
#             nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer3 = nn.Sequential(
            
        
        )
        self.fc = nn.Linear(7*7*32, num_classes)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out

model = ConvNet(num_classes).to(device)