In [1]:
from types import SimpleNamespace
from pathlib import Path

import torch
from torch import nn
from dataset import get_mnist, separate
from skorch import NeuralNetClassifier
from sklearn.metrics import accuracy_score

import os
os.environ['KERAS_BACKEND'] = 'torch'
import keras
from keras import layers

### Configuration

In [2]:
args = SimpleNamespace(dataset="mnist")
args.device = "cuda" if torch.cuda.is_available() else "cpu"
args.lr = 1e-3
args.batch = 64
args.epoch = 100
print(args)

namespace(dataset='mnist', device='cuda', lr=0.001, batch=64, epoch=100)


In [3]:
loader = get_mnist(batch_size=args.batch)
len(loader[0]), len(loader[1])

(937, 157)

In [4]:
x_train, y_train = separate(loader[0])
x_train.shape, y_train.shape

(torch.Size([59968, 1, 28, 28]), torch.Size([59968]))

In [5]:
x_test, y_test = separate(loader[1])
x_test.shape, y_test.shape

(torch.Size([10000, 1, 28, 28]), torch.Size([10000]))

In [6]:
class Model(nn.Module):
    
    def __init__(self, num_classes=10):
        super().__init__()
        self.model = keras.Sequential([layers.Input((1, 28, 28)),
                                       layers.Flatten(),
                                       layers.Dense(200, activation="relu"),
                                       layers.Dense(150, activation="relu"),
                                       layers.Dense(num_classes, activation="softmax")])
        
    def forward(self, x):
        return self.model(x)

In [7]:
model = Model()
pred_y = model(x_train[0:32])
pred_y.shape

torch.Size([32, 10])

In [8]:
# default loss: NLLL
net = NeuralNetClassifier(Model, max_epochs=args.epoch, lr=args.lr, 
                          batch_size=args.batch, device=args.device)
net.fit(x_train, y_train)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m2.1405[0m       [32m0.5295[0m        [35m1.9490[0m  3.7053
      2        [36m1.7325[0m       [32m0.7064[0m        [35m1.5005[0m  3.8063
      3        [36m1.2949[0m       [32m0.7762[0m        [35m1.1001[0m  3.5674
      4        [36m0.9707[0m       [32m0.8192[0m        [35m0.8485[0m  3.6928
      5        [36m0.7779[0m       [32m0.8412[0m        [35m0.7021[0m  3.7257
      6        [36m0.6627[0m       [32m0.8550[0m        [35m0.6115[0m  3.8992
      7        [36m0.5885[0m       [32m0.8639[0m        [35m0.5510[0m  3.8088
      8        [36m0.5372[0m       [32m0.8709[0m        [35m0.5078[0m  3.7070
      9        [36m0.4994[0m       [32m0.8768[0m        [35m0.4755[0m  3.6823
     10        [36m0.4705[0m       [32m0.8819[0m        [35m0.4504[0m  3.5816
     11        [36m0.4475[0m       [32m0.88

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=Model(
    (model): <Sequential name=sequential_1, built=True>
  ),
)

In [9]:
y_pred = net.predict(x_test[:5])
y_pred

array([7, 2, 1, 0, 4], dtype=int64)

In [10]:
y_proba = net.predict_proba(x_test[:5])
y_proba

array([[4.6451274e-05, 5.3063718e-06, 4.1977540e-04, 2.8449306e-03,
        1.7729832e-06, 3.5731573e-05, 9.5632489e-09, 9.9603051e-01,
        2.0018611e-05, 5.9551321e-04],
       [3.1593044e-03, 1.4765879e-04, 9.8155826e-01, 6.5317373e-03,
        1.1567742e-08, 1.4843104e-03, 5.3820792e-03, 1.3069323e-08,
        1.7366771e-03, 5.2421928e-08],
       [1.9699519e-05, 9.8108172e-01, 7.4166330e-03, 2.3047475e-03,
        2.4427121e-04, 1.1911511e-03, 8.1940944e-04, 3.7905967e-03,
        2.6040056e-03, 5.2784377e-04],
       [9.9917585e-01, 1.8151191e-07, 1.4632031e-04, 1.3783371e-05,
        9.6082687e-08, 2.4336361e-04, 2.7840881e-04, 3.0799725e-05,
        4.2784905e-06, 1.0681527e-04],
       [6.3727214e-04, 1.2009788e-05, 2.0672868e-03, 7.4143056e-05,
        9.5954847e-01, 3.2029176e-04, 1.7458161e-03, 2.1546860e-03,
        2.3238973e-03, 3.1116124e-02]], dtype=float32)

In [11]:
y_pred = net.predict(x_test)
accuracy_score(y_test, y_pred)

0.9464

In [12]:
class Model(nn.Module):
    
    def __init__(self, num_classes=10):
        super().__init__()
        self.model = keras.Sequential([layers.Input((1, 28, 28)),
                                       layers.Flatten(),
                                       layers.Dense(200, activation="relu"),
                                       layers.Dense(150, activation="relu"),
                                       layers.Dense(num_classes)])
        
    def forward(self, x):
        return self.model(x)

In [13]:
net = NeuralNetClassifier(Model, max_epochs=args.epoch, lr=args.lr, 
                          criterion=nn.CrossEntropyLoss, 
                          batch_size=args.batch, device=args.device)
net.fit(x_train, y_train)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m2.1292[0m       [32m0.5456[0m        [35m1.9259[0m  3.5303
      2        [36m1.6923[0m       [32m0.7203[0m        [35m1.4522[0m  3.5847
      3        [36m1.2473[0m       [32m0.7883[0m        [35m1.0597[0m  3.5464
      4        [36m0.9367[0m       [32m0.8266[0m        [35m0.8217[0m  3.6157
      5        [36m0.7552[0m       [32m0.8457[0m        [35m0.6837[0m  3.7120
      6        [36m0.6468[0m       [32m0.8581[0m        [35m0.5980[0m  3.8556
      7        [36m0.5766[0m       [32m0.8676[0m        [35m0.5403[0m  3.3678
      8        [36m0.5276[0m       [32m0.8749[0m        [35m0.4989[0m  3.3392
      9        [36m0.4915[0m       [32m0.8803[0m        [35m0.4677[0m  3.4421
     10        [36m0.4636[0m       [32m0.8839[0m        [35m0.4433[0m  3.6726
     11        [36m0.4415[0m       [32m0.88

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=Model(
    (model): <Sequential name=sequential_2, built=True>
  ),
)

In [14]:
y_pred = net.predict(x_test[:5])
y_pred

array([7, 2, 1, 0, 4], dtype=int64)

In [15]:
y_proba = net.predict_proba(x_test[:5])
y_proba

array([[5.4555472e-05, 1.0122669e-07, 1.0481225e-03, 1.4534072e-03,
        1.2624902e-07, 1.8522098e-05, 5.1840638e-09, 9.9732971e-01,
        7.4801997e-06, 8.7973662e-05],
       [1.0834259e-03, 1.9611622e-05, 9.9053568e-01, 6.9521121e-03,
        3.0625497e-08, 6.8418367e-04, 6.0150540e-04, 1.6620586e-08,
        1.2337540e-04, 3.8639008e-08],
       [2.1273694e-05, 9.8369986e-01, 6.0267276e-03, 1.6142586e-03,
        3.3433113e-04, 7.5262145e-04, 1.5223800e-03, 3.6379872e-03,
        2.1086324e-03, 2.8187811e-04],
       [9.9955350e-01, 5.2985825e-09, 2.3225954e-04, 1.0755182e-05,
        4.2780133e-08, 6.6379762e-05, 9.8772172e-05, 9.7750226e-06,
        7.3832412e-06, 2.1232012e-05],
       [3.0487505e-04, 2.3657700e-05, 1.1745940e-03, 5.7727117e-05,
        9.6659064e-01, 4.4128322e-04, 2.5744117e-03, 1.9118144e-03,
        1.0550403e-03, 2.5865979e-02]], dtype=float32)

In [16]:
y_pred = net.predict(x_test)
accuracy_score(y_test, y_pred)

0.9482