In [1]:
import numpy as np
import tensorflow as tf
from keras import datasets
from keras.models import Sequential
from keras.layers import Dense

In [2]:
np.random.seed(123)
tf.random.set_seed(123)

# prepare data
mnist = datasets.mnist
(x_train, t_train), (x_test, t_test) = mnist.load_data()

x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32)
x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32)
t_train = np.eye(10)[t_train].astype(np.float32)
t_test = np.eye(10)[t_test].astype(np.float32)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
model = Sequential()
model.add(Dense(200, activation='sigmoid'))
# model.add(Dense(200, activation='sigmoid'))
# model.add(Dense(200, activation='sigmoid'))
model.add(Dense(10, activation='softmax'))

In [4]:
model.compile(optimizer='sgd', loss='categorical_crossentropy',
                metrics=['accuracy'])

model.fit(x_train, t_train,
            epochs=30, batch_size=100,
            verbose=2)

loss, acc = model.evaluate(x_test, t_test, verbose=0)
print('test_loss: {:.3f}, test_acc: {:.3f}'.format(
    loss,
    acc
))

Epoch 1/30
600/600 - 5s - loss: 1.9092 - accuracy: 0.5806 - 5s/epoch - 8ms/step
Epoch 2/30
600/600 - 3s - loss: 1.3071 - accuracy: 0.7712 - 3s/epoch - 4ms/step
Epoch 3/30
600/600 - 2s - loss: 0.9560 - accuracy: 0.8189 - 2s/epoch - 4ms/step
Epoch 4/30
600/600 - 3s - loss: 0.7675 - accuracy: 0.8426 - 3s/epoch - 5ms/step
Epoch 5/30
600/600 - 3s - loss: 0.6581 - accuracy: 0.8550 - 3s/epoch - 5ms/step
Epoch 6/30
600/600 - 3s - loss: 0.5880 - accuracy: 0.8642 - 3s/epoch - 6ms/step
Epoch 7/30
600/600 - 5s - loss: 0.5395 - accuracy: 0.8703 - 5s/epoch - 8ms/step
Epoch 8/30
600/600 - 3s - loss: 0.5038 - accuracy: 0.8754 - 3s/epoch - 5ms/step
Epoch 9/30
600/600 - 3s - loss: 0.4765 - accuracy: 0.8791 - 3s/epoch - 4ms/step
Epoch 10/30
600/600 - 3s - loss: 0.4549 - accuracy: 0.8826 - 3s/epoch - 5ms/step
Epoch 11/30
600/600 - 4s - loss: 0.4373 - accuracy: 0.8857 - 4s/epoch - 6ms/step
Epoch 12/30
600/600 - 4s - loss: 0.4229 - accuracy: 0.8886 - 4s/epoch - 6ms/step
Epoch 13/30
600/600 - 4s - loss: 0.41

# TensorFlow

In [5]:
import numpy as np
from sklearn.utils import shuffle
import tensorflow as tf
from keras import datasets
from keras.models import Model
from keras.layers import Dense
from keras import optimizers
from keras import losses
from keras import metrics

In [6]:
class DNN(Model):
    def __init__(self, hidden_dim, output_dim):
        super().__init__()
        self.l1 = Dense(hidden_dim, activation='sigmoid')
        self.l2 = Dense(hidden_dim, activation='sigmoid')
        self.l3 = Dense(hidden_dim, activation='sigmoid')
        self.l4 = Dense(output_dim, activation='softmax')

        self.ls = [self.l1, self.l2, self.l3, self.l4]

    def call(self, x):
        for layer in self.ls:
            x = layer(x)

        return x

In [7]:
np.random.seed(123)
tf.random.set_seed(123)

# prepare data
mnist = datasets.mnist
(x_train, t_train), (x_test, t_test) = mnist.load_data()

x_train = (x_train.reshape(-1, 784) / 255).astype(np.float32)
x_test = (x_test.reshape(-1, 784) / 255).astype(np.float32)
t_train = np.eye(10)[t_train].astype(np.float32)
t_test = np.eye(10)[t_test].astype(np.float32)


In [8]:
model = DNN(200, 10)

criterion = losses.CategoricalCrossentropy()
optimizer = optimizers.SGD(learning_rate=0.01)
train_loss = metrics.Mean()
train_acc = metrics.CategoricalAccuracy()

def compute_loss(t, y):
    return criterion(t, y)

def train_step(x, t):
    with tf.GradientTape() as tape:
        preds = model(x)
        loss = compute_loss(t, preds)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    train_loss(loss)
    train_acc(t, preds)

    return loss

epochs = 30
batch_size = 100
n_batches = x_train.shape[0] // batch_size

for epoch in range(epochs):
    x_, t_ = shuffle(x_train, t_train)

    for batch in range(n_batches):
        start = batch * batch_size
        end = start + batch_size
        train_step(x_[start:end], t_[start:end])

    print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format(
        epoch+1,
        train_loss.result(),
        train_acc.result()
    ))

epoch: 1, loss: 2.31, acc: 0.114
epoch: 2, loss: 2.3, acc: 0.118
epoch: 3, loss: 2.3, acc: 0.123
epoch: 4, loss: 2.29, acc: 0.129
epoch: 5, loss: 2.29, acc: 0.136
epoch: 6, loss: 2.29, acc: 0.147
epoch: 7, loss: 2.28, acc: 0.160
epoch: 8, loss: 2.28, acc: 0.177
epoch: 9, loss: 2.27, acc: 0.194
epoch: 10, loss: 2.26, acc: 0.212


KeyboardInterrupt: 

# PyTorch

In [9]:
import os
import numpy as np
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optimizers
from torch.utils.data import DataLoader
from torchvision import datasets
import torchvision.transforms as transforms

In [10]:
np.random.seed(123)
torch.manual_seed(123)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

root = os.path.join('~', '.torch', 'mnist')
transform = transforms.Compose([transforms.ToTensor(),
                                lambda x: x.view(-1)])
mnist_train = datasets.MNIST(root=root,
                                download=True,
                                train=True,
                                transform=transform)
mnist_test = datasets.MNIST(root=root,
                            download=True,
                            train=False,
                            transform=transform)

train_dataloader = DataLoader(mnist_train,
                                batch_size=100,
                                shuffle=True)
test_dataloader = DataLoader(mnist_test,
                                batch_size=100,
                                shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to C:\Users\hp\.torch\mnist\MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting C:\Users\hp\.torch\mnist\MNIST\raw\train-images-idx3-ubyte.gz to C:\Users\hp\.torch\mnist\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to C:\Users\hp\.torch\mnist\MNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting C:\Users\hp\.torch\mnist\MNIST\raw\train-labels-idx1-ubyte.gz to C:\Users\hp\.torch\mnist\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to C:\Users\hp\.torch\mnist\MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting C:\Users\hp\.torch\mnist\MNIST\raw\t10k-images-idx3-ubyte.gz to C:\Users\hp\.torch\mnist\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to C:\Users\hp\.torch\mnist\MNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%


Extracting C:\Users\hp\.torch\mnist\MNIST\raw\t10k-labels-idx1-ubyte.gz to C:\Users\hp\.torch\mnist\MNIST\raw



In [11]:
class DNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.l1 = nn.Linear(input_dim, hidden_dim)
        self.a1 = nn.Sigmoid()
        self.l2 = nn.Linear(hidden_dim, hidden_dim)
        self.a2 = nn.Sigmoid()
        self.l3 = nn.Linear(hidden_dim, hidden_dim)
        self.a3 = nn.Sigmoid()
        self.l4 = nn.Linear(hidden_dim, output_dim)

        self.layers = [self.l1, self.a1,
                       self.l2, self.a2,
                       self.l3, self.a3,
                       self.l4]

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)

        return x

In [14]:
class DNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.l1 = nn.Linear(input_dim, hidden_dim)
        self.a1 = nn.Tanh()
        self.l2 = nn.Linear(hidden_dim, hidden_dim)
        self.a2 = nn.Tanh()
        self.l3 = nn.Linear(hidden_dim, hidden_dim)
        self.a3 = nn.Tanh()
        self.l4 = nn.Linear(hidden_dim, output_dim)

        self.layers = [self.l1, self.a1,
                       self.l2, self.a2,
                       self.l3, self.a3,
                       self.l4]

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)

        return x

In [15]:
model = DNN(784, 200, 10).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optimizers.SGD(model.parameters(), lr=0.01)

def compute_loss(t, y):
    return criterion(y, t)

def train_step(x, t):
    model.train()
    preds = model(x)
    loss = compute_loss(t, preds)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return loss, preds

epochs = 30

for epoch in range(epochs):
    train_loss = 0.
    train_acc = 0.

    for (x, t) in train_dataloader:
        x, t = x.to(device), t.to(device)
        loss, preds = train_step(x, t)
        train_loss += loss.item()
        train_acc += \
            accuracy_score(t.tolist(),
                            preds.argmax(dim=-1).tolist())

    train_loss /= len(train_dataloader)
    train_acc /= len(train_dataloader)

    print('epoch: {}, loss: {:.3}, acc: {:.3f}'.format(
        epoch+1,
        train_loss,
        train_acc
    ))


epoch: 1, loss: 1.82, acc: 0.533
epoch: 2, loss: 0.757, acc: 0.799
epoch: 3, loss: 0.492, acc: 0.865
epoch: 4, loss: 0.408, acc: 0.886
epoch: 5, loss: 0.368, acc: 0.897
epoch: 6, loss: 0.343, acc: 0.903
epoch: 7, loss: 0.325, acc: 0.907
epoch: 8, loss: 0.31, acc: 0.911
epoch: 9, loss: 0.298, acc: 0.915
epoch: 10, loss: 0.287, acc: 0.918
epoch: 11, loss: 0.277, acc: 0.920
epoch: 12, loss: 0.268, acc: 0.923
epoch: 13, loss: 0.259, acc: 0.925
epoch: 14, loss: 0.252, acc: 0.928
epoch: 15, loss: 0.244, acc: 0.930
epoch: 16, loss: 0.237, acc: 0.932
epoch: 17, loss: 0.23, acc: 0.934
epoch: 18, loss: 0.223, acc: 0.936
epoch: 19, loss: 0.217, acc: 0.937
epoch: 20, loss: 0.21, acc: 0.940
epoch: 21, loss: 0.204, acc: 0.942
epoch: 22, loss: 0.198, acc: 0.943
epoch: 23, loss: 0.192, acc: 0.945
epoch: 24, loss: 0.187, acc: 0.946
epoch: 25, loss: 0.181, acc: 0.948
epoch: 26, loss: 0.176, acc: 0.950
epoch: 27, loss: 0.171, acc: 0.951
epoch: 28, loss: 0.166, acc: 0.953
epoch: 29, loss: 0.161, acc: 0.95

In [16]:
def test_step(x, t):
    model.eval()
    preds = model(x)
    loss = criterion(preds, t)

    return loss, preds

test_loss = 0.
test_acc = 0.

for (x, t) in test_dataloader:
    x, t = x.to(device), t.to(device)
    loss, preds = test_step(x, t)
    test_loss += loss.item()
    test_acc += \
        accuracy_score(t.tolist(),
                        preds.argmax(dim=-1).tolist())

test_loss /= len(test_dataloader)
test_acc /= len(test_dataloader)
print('test_loss: {:.3f}, test_acc: {:.3f}'.format(
    test_loss,
    test_acc
))


test_loss: 0.161, test_acc: 0.953
