## Pytorch Version

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as tfms

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

if device == 'cuda':
    torch.cuda.set_device(0)
    print(torch.cuda.current_device())

0


In [4]:
mnist_train = torchvision.datasets.MNIST(root='./data',
                                         train=True,
                                         transform=tfms.Compose([
                                             tfms.ToTensor(),
                                             tfms.Normalize((0.1307,), (0.3081,))
                                         ]),
                                         download=True)

mnist_test = torchvision.datasets.MNIST(root='./data',
                                        train=False,
                                        transform=tfms.Compose([
                                             tfms.ToTensor(),
                                             tfms.Normalize((0.1307,), (0.3081,))
                                         ]),
                                        download=True
                                        )

In [5]:
bs = 128

train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                           batch_size=bs,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=mnist_test,
                                          batch_size=bs,
                                          shuffle=False)

In [6]:
class CustomModel(nn.Module):
    def __init__(self):
        super(CustomModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3)
        self.fc1 = nn.Linear(24*24*32, 128)
        self.fc2 = nn.Linear(128, 10)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

In [7]:
model = CustomModel().to(device)

In [8]:
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

In [9]:
def train_step(inputs, targets):
    optimizer.zero_grad()
    
    inputs, targets = inputs.to(device), targets.to(device)
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()
    
    batch_loss = loss.item()
    _, predictions = outputs.max(1)  # return values, indices
    correct = predictions.eq(targets).sum().item()
    
    return batch_loss, correct

In [10]:
def test_step(inputs, targets):
    with torch.no_grad():
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        batch_loss = loss.item()
        _, predictions = outputs.max(1)
        correct = predictions.eq(targets).sum().item()
        
        return batch_loss, correct

In [11]:
EPOCHS = 5
best_acc = 0.

for epoch in range(EPOCHS):
    
    train_loss = 0.
    train_total = 0
    train_correct = 0
    test_loss = 0.
    test_total = 0
    test_correct = 0
    
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        batch_loss, correct = train_step(inputs, targets)
        train_loss += batch_loss
        train_total += targets.size(0)
        train_correct += correct
        
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        batch_loss, correct = test_step(inputs, targets)
        test_loss += batch_loss
        test_total += targets.size(0)
        test_correct += correct
        
    test_accuracy = (test_correct/test_total)*100
        
    template = 'Epoch {}, Loss: {:.6f}, Accuracy: {:.3f}%, Test Loss: {:.6f}, Test Accuracy: {:.3f}%'
    print(template.format(epoch+1,
                          train_loss,
                          (train_correct/train_total)*100,
                          test_loss,
                          test_accuracy))
    
    if test_accuracy > best_acc:
        print("new best acc!")
        best_acc = test_accuracy

Epoch 1, Loss: 67.485920, Accuracy: 95.708%, Test Loss: 4.910681, Test Accuracy: 97.930%
new best acc!
Epoch 2, Loss: 18.754155, Accuracy: 98.753%, Test Loss: 3.026167, Test Accuracy: 98.720%
new best acc!
Epoch 3, Loss: 10.495968, Accuracy: 99.318%, Test Loss: 3.122016, Test Accuracy: 98.770%
new best acc!
Epoch 4, Loss: 6.273657, Accuracy: 99.572%, Test Loss: 2.965606, Test Accuracy: 98.910%
new best acc!
Epoch 5, Loss: 4.698576, Accuracy: 99.675%, Test Loss: 3.951022, Test Accuracy: 98.600%


## TensorFlow 2 Version

In [12]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
import tensorflow.keras as tk
import tensorflow.keras.optimizers as optim
import tensorflow.keras.activations as activ
import tensorflow.keras.layers as layers

tf.debugging.set_log_device_placement(True)
tf.config.set_soft_device_placement(True)

In [13]:
print(tf.__version__)

tf.config.experimental.get_visible_devices()

2.0.0


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:3', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:4', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:5', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:6', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:7', device_type='GPU')]

In [14]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train/255, x_test/255.

print(type(x_train))

print(x_train.shape)
print(x_test.shape)

x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]

print(x_train.shape)
print(x_test.shape)

<class 'numpy.ndarray'>
(60000, 28, 28)
(10000, 28, 28)
(60000, 28, 28, 1)
(10000, 28, 28, 1)


In [15]:
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

Executing op TensorSliceDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op AnonymousRandomSeedGenerator in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ShuffleDatasetV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op BatchDatasetV2 in device /job:localhost/replica:0/task:0/device:CPU:0


In [16]:
class CustomModel(tk.Model):
    def __init__(self):
        super().__init__()
        # (32, 28, 28, 1)
        self.conv1 = layers.Conv2D(32, kernel_size=3)
        self.conv2 = layers.Conv2D(32, kernel_size=3)
        self.flatten = layers.Flatten()
        self.fc1 = layers.Dense(128)
        self.fc2 = layers.Dense(10)
    
    def call(self, x):
        x = activ.relu(self.conv1(x))
        x = activ.relu(self.conv2(x))
        x = self.flatten(x)  # OR layers.Flatten()(x)
        x = activ.relu(self.fc1(x))
        
        return activ.softmax(self.fc2(x))

#### TIPS. If you want to provide labels using one-hot representation, please use CategoricalCrossentropy loss.

In [17]:
model = CustomModel()

In [18]:
criterion = tk.losses.SparseCategoricalCrossentropy()
optimizer = optim.Adam()

In [19]:
train_loss = tk.metrics.Mean(name='train_loss')
train_accuracy = tk.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tk.metrics.Mean(name='test_loss')
test_accuracy = tk.metrics.SparseCategoricalAccuracy(name='test_accuracy')

Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarIsInitializedOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op LogicalNot in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Assert in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0


In [20]:
@tf.function
def train_step(inputs, targets):
    with tf.GradientTape() as tape:
        predictions = model(inputs)
        loss = criterion(targets, predictions)
    
    gradients = tape.gradient(loss, model.trainable_variables)  # pytorch: loss.backward()
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))  # pytorch: optimizer.step()
    
    train_loss(loss)
    train_accuracy(targets, predictions)

In [21]:
@tf.function
def test_step(inputs, targets):
    predictions = model(inputs)
    loss = criterion(targets, predictions)
    
    test_loss(loss)
    test_accuracy(targets, predictions)

In [22]:
EPOCHS = 5

for epoch in range(EPOCHS):
    for inputs, targets in train_ds:
        train_step(inputs, targets)
        
    for inputs, targets in test_ds:
        test_step(inputs, targets)
    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch+1,
                          train_loss.result(),
                          train_accuracy.result()*100,
                          test_loss.result(),
                          test_accuracy.result()*100))
    
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

Executing op OptimizeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ModelDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op AnonymousIteratorV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MakeIterator in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op IteratorGetNextSync in device /job:localhost/replica:0/task:0/device:CPU:0


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Executing op RandomUniform in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Sub in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Mul in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Add in device /job:localhost/replica:0