In [1]:
%load_ext autoreload
%autoreload 2

# MNIST

In [6]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)

In [7]:
X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y, test_size=10000, random_state=42, stratify=y
)

X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval, test_size=10000, random_state=42, stratify=y_trainval
)

In [10]:
from dl_modules import train_mnist_model
import numpy as np

model = train_mnist_model(X_train, y_train, X_val, y_val)
y_test_ohe = np.eye(10)[y_test.astype(int)]
model.evaluate(X_test.astype(np.float32)/255.0, y_test_ohe, 200)

Training:  10%|█         | 1615/15620 [00:02<00:29, 467.81it/s]

Epoch:   1, train loss: 0.563306, train accuracy: 85.37%, validation loss: 0.309056, validation accuracy: 91.11%


Training:  20%|██        | 3170/15620 [00:05<00:23, 530.34it/s]

Epoch:   2, train loss: 0.276264, train accuracy: 92.22%, validation loss: 0.245097, validation accuracy: 93.06%


Training:  31%|███       | 4766/15620 [00:08<00:21, 500.87it/s]

Epoch:   3, train loss: 0.226034, train accuracy: 93.53%, validation loss: 0.210444, validation accuracy: 93.88%


Training:  40%|████      | 6316/15620 [00:10<00:16, 552.71it/s]

Epoch:   4, train loss: 0.194962, train accuracy: 94.43%, validation loss: 0.187245, validation accuracy: 94.53%


Training:  51%|█████     | 7895/15620 [00:12<00:13, 570.27it/s]

Epoch:   5, train loss: 0.171285, train accuracy: 95.16%, validation loss: 0.168982, validation accuracy: 94.92%


Training:  60%|██████    | 9434/15620 [00:15<00:11, 549.93it/s]

Epoch:   6, train loss: 0.152623, train accuracy: 95.59%, validation loss: 0.153901, validation accuracy: 95.42%


Training:  70%|███████   | 10994/15620 [00:17<00:08, 566.53it/s]

Epoch:   7, train loss: 0.138257, train accuracy: 95.98%, validation loss: 0.139032, validation accuracy: 95.98%


Training:  80%|████████  | 12551/15620 [00:20<00:07, 433.89it/s]

Epoch:   8, train loss: 0.125806, train accuracy: 96.33%, validation loss: 0.132256, validation accuracy: 95.99%


Training:  90%|█████████ | 14114/15620 [00:23<00:03, 422.60it/s]

Epoch:   9, train loss: 0.114847, train accuracy: 96.71%, validation loss: 0.126151, validation accuracy: 96.33%


Training: 100%|██████████| 15620/15620 [00:25<00:00, 608.76it/s]


Epoch:  10, train loss: 0.105822, train accuracy: 97.01%, validation loss: 0.120719, validation accuracy: 96.41%



(0.12451177127669814, np.float64(0.9648))

In [11]:
print(model)

               Dense | input: (784,)               output: (256,)               | params:          200,960
                ReLU | input: (256,)               output: (256,)               | params:                0
               Dense | input: (256,)               output: (256,)               | params:           65,792
                ReLU | input: (256,)               output: (256,)               | params:                0
               Dense | input: (256,)               output: (10,)                | params:            2,570
             Softmax | input: (10,)                output: (10,)                | params:                0
----------------------------------------------------------------------------------------------------------
               Total | total parameters:              269,322


# CIFAR10

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:

import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import cifar10


(x_train, y_train), (x_test, y_test) = cifar10.load_data()

cifar_mean = np.array([0.4914, 0.4822, 0.4465])
cifar_std = np.array([0.2470, 0.2435, 0.2616])

x_train = (x_train.astype(np.float32)/255.0 - cifar_mean) / cifar_std
x_test = (x_test.astype(np.float32)/255.0 - cifar_mean) / cifar_std


print("Train:", x_train.shape, y_train.shape)
print("Test:", x_test.shape, y_test.shape)
print("\nNumber of classes:", len(np.unique(y_train)))



Train: (50000, 32, 32, 3) (50000, 1)
Test: (10000, 32, 32, 3) (10000, 1)

Number of classes: 10


In [3]:
number_of_classes = 10

x_train_ = x_train.transpose(0,3,1,2)
x_test_ = x_test.transpose(0,3,1,2)
y_train_ohe = np.eye(number_of_classes, dtype=np.float32)[y_train.ravel()]
y_test_ohe = np.eye(number_of_classes, dtype=np.float32)[y_test.ravel()]

print('Training set:', x_train_.shape, y_train_ohe.shape)
print('Test set:', x_test_.shape, y_test_ohe.shape)

Training set: (50000, 3, 32, 32) (50000, 10)
Test set: (10000, 3, 32, 32) (10000, 10)


In [4]:
from dl_modules import train_cifar10_model

model = train_cifar10_model(x_train_, y_train_ohe, x_test_, y_test_ohe)

Learning rate:  0.01 , momentum:  0.9 , batch size:  128 , epochs:  10 , weight_decay: 0.001


Training:  10%|█         | 390/3900 [01:58<23:14,  2.52it/s]

Epoch:   1, train loss: 1.540034, train accuracy: 44.78%, validation loss: 1.259351, validation accuracy: 55.70%


Training:  20%|██        | 780/3900 [04:05<17:35,  2.96it/s]  

Epoch:   2, train loss: 1.220602, train accuracy: 56.52%, validation loss: 1.092741, validation accuracy: 60.43%


Training:  30%|███       | 1170/3900 [06:10<12:11,  3.73it/s] 

Epoch:   3, train loss: 1.079972, train accuracy: 61.61%, validation loss: 0.961158, validation accuracy: 66.85%


Training:  40%|████      | 1560/3900 [08:11<11:05,  3.52it/s]  

Epoch:   4, train loss: 0.984053, train accuracy: 65.20%, validation loss: 0.912132, validation accuracy: 68.03%


Training:  50%|█████     | 1950/3900 [10:13<09:35,  3.39it/s]  

Epoch:   5, train loss: 0.900539, train accuracy: 68.18%, validation loss: 0.905999, validation accuracy: 68.85%


Training:  60%|██████    | 2340/3900 [12:23<07:17,  3.57it/s]  

Epoch:   6, train loss: 0.835204, train accuracy: 70.45%, validation loss: 0.824331, validation accuracy: 71.56%


Training:  70%|███████   | 2730/3900 [14:26<05:16,  3.70it/s]  

Epoch:   7, train loss: 0.780159, train accuracy: 72.60%, validation loss: 0.888020, validation accuracy: 69.69%


Training:  80%|████████  | 3120/3900 [16:33<03:45,  3.46it/s]  

Epoch:   8, train loss: 0.724700, train accuracy: 74.58%, validation loss: 0.785224, validation accuracy: 72.78%


Training:  90%|█████████ | 3510/3900 [18:47<01:50,  3.54it/s]

Epoch:   9, train loss: 0.686290, train accuracy: 75.82%, validation loss: 0.775161, validation accuracy: 73.37%


Training: 100%|██████████| 3900/3900 [20:48<00:00,  3.12it/s]


Epoch:  10, train loss: 0.644922, train accuracy: 77.27%, validation loss: 0.787044, validation accuracy: 72.94%



In [5]:
print(model)

              Conv2D | input: (3, 32, 32)          output: (16, 32, 32)         | params:              448
                ReLU | input: (16, 32, 32)         output: (16, 32, 32)         | params:                0
           BatchNorm | input: (16, 32, 32)         output: (16, 32, 32)         | params:               32
           Pooling2D | input: (16, 32, 32)         output: (16, 16, 16)         | params:                0
      SpatialDropout | input: (16, 16, 16)         output: (16, 16, 16)         | params:                0
              Conv2D | input: (16, 16, 16)         output: (64, 16, 16)         | params:            9,280
                ReLU | input: (64, 16, 16)         output: (64, 16, 16)         | params:                0
           BatchNorm | input: (64, 16, 16)         output: (64, 16, 16)         | params:              128
           Pooling2D | input: (64, 16, 16)         output: (64, 8, 8)           | params:                0
      SpatialDropout | input: (64, 8,