In [1]:
from dataset import load_svhn, random_split_train_val
from layers import *
from trainer import *
from optim import *
from loss import *
from collections import OrderedDict
from layers import *
from operators import *
from model import NumberSortModule
import numpy as np

In [2]:
def one_hot(x: (np.ndarray, int), vocab_size: int) -> np.ndarray:
    assert isinstance(vocab_size, int), "vocab_size is an integer value"
    assert isinstance(x, (int, np.ndarray)), "unsupported type for one-hot encoding"
    if isinstance(x, int):
        assert x < vocab_size, "out of vocabulary"
        y = np.zeros(vocab_size)
        y[x] = 1
        return y
    
    assert x.dtype == np.int32, "unsupported x.dtype for one-hot encoding"
    
    y = np.eye(vocab_size)[x.ravel()]
    y = y.reshape((*x.shape, vocab_size))
    
    return y


def split_test_train(X, y, test_size=0.25):
    train_size = int(len(X) * (1 - test_size))

    train_X = X[:train_size, :]
    train_y = y[:train_size, :]

    test_X = X[train_size:, :]
    test_y = y[train_size:, :]

    return train_X, train_y, test_X, test_y


def generate_batch(batch_size: int = 32, seq_len: int = 10, max_num: int = 100) -> np.ndarray:
    while True:
        X = np.empty((batch_size, seq_len), dtype=np.int32)
        y = np.empty((batch_size, seq_len), dtype=np.int32)

        for batch_num in range(batch_size):
            sample, label = next(generate_sample_pointer(seq_len, max_num))
            X[batch_num] = sample
            y[batch_num] = label

        yield one_hot(X, vocab_size=max_num), one_hot(y, vocab_size=max_num)


def generate_sample_pointer(seq_len: int = 10, max_num: int = 100) -> np.ndarray:
    while True:
        X = np.random.randint(max_num, size=(seq_len))
#         y = np.empty((2, seq_len), dtype=np.int32)
#         y[0] = X
#         y[1] = np.arange(seq_len)
#         y = y[:, y[0].argsort()]
        y = np.sort(X, axis=-1)

        yield X, y # [1]


def create_dataset(num_samples: int, seq_len, max_num) -> Dataset:
    data, labels = next(generate_batch(num_samples, seq_len, max_num))
    return Dataset(*split_test_train(data, labels))


In [3]:
x, y = next(generate_batch(1, 5, 10))

In [4]:
x

array([[[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]]])

In [5]:
x.argmax(axis=-1)

array([[0, 9, 0, 3, 6]])

In [6]:
y

array([[[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]]])

In [7]:
y.argmax(axis=-1)

array([[0, 0, 3, 6, 9]])

In [8]:
vocab_size = 10

In [70]:
nsort = NumberSortModule(vocab_size, 5, 10, 1)

In [76]:
optimizer = Adam(nsort, learning_rate=1e-4)

In [77]:
optimizer.learning_rate

0.0001

In [78]:
# optimizer.decay(0.1)

In [79]:
dataset = create_dataset(10000, 5, vocab_size)

trainer = Trainer(
    nsort,
    dataset,
    optimizer,
#     MomentumSGD(nsort, momentum=0.85, learning_rate=0.01),
    loss='cross-entropy',
    num_epochs=100,
    batch_size=1,
#     learning_rate_decay=0.85
)

In [80]:
loss_history, train_history, val_history = trainer.fit()

Loss: 2.032206, Train accuracy: 0.321040, val accuracy: 0.315280
Loss: 2.029379, Train accuracy: 0.320720, val accuracy: 0.316400
Loss: 2.025215, Train accuracy: 0.323413, val accuracy: 0.319040
Loss: 2.020979, Train accuracy: 0.324747, val accuracy: 0.315760
Loss: 2.018250, Train accuracy: 0.326987, val accuracy: 0.319680
Loss: 2.016169, Train accuracy: 0.326773, val accuracy: 0.320160
Loss: 2.015247, Train accuracy: 0.328347, val accuracy: 0.320640
Loss: 2.015099, Train accuracy: 0.327787, val accuracy: 0.321760
Loss: 2.011712, Train accuracy: 0.330507, val accuracy: 0.326560
Loss: 2.008553, Train accuracy: 0.336853, val accuracy: 0.331440
Loss: 2.005055, Train accuracy: 0.343200, val accuracy: 0.337520
Loss: 2.000763, Train accuracy: 0.348187, val accuracy: 0.344320
Loss: 1.996293, Train accuracy: 0.352027, val accuracy: 0.344880
Loss: 1.992351, Train accuracy: 0.352400, val accuracy: 0.346240
Loss: 1.989316, Train accuracy: 0.350373, val accuracy: 0.346160
Loss: 1.987608, Train acc

KeyboardInterrupt: 

In [83]:
optimizer = Adam(nsort, learning_rate=1e-3)
dataset = create_dataset(10000, 5, vocab_size)
trainer = Trainer(
    nsort,
    dataset,
    optimizer,
    loss='cross-entropy',
    num_epochs=100,
    batch_size=1,
)

In [92]:
loss_history, train_history, val_history = trainer.fit()

Loss: 1.868970, Train accuracy: 0.422187, val accuracy: 0.420160
Loss: 1.868968, Train accuracy: 0.421893, val accuracy: 0.420560
Loss: 1.868732, Train accuracy: 0.422293, val accuracy: 0.420880
Loss: 1.868810, Train accuracy: 0.422267, val accuracy: 0.421120
Loss: 1.869100, Train accuracy: 0.421920, val accuracy: 0.422080
Loss: 1.869649, Train accuracy: 0.421413, val accuracy: 0.422720
Loss: 1.869956, Train accuracy: 0.421627, val accuracy: 0.424400
Loss: 1.869928, Train accuracy: 0.421253, val accuracy: 0.423760
Loss: 1.870269, Train accuracy: 0.420720, val accuracy: 0.423760
Loss: 1.870766, Train accuracy: 0.421067, val accuracy: 0.423680
Loss: 1.870982, Train accuracy: 0.421280, val accuracy: 0.423200
Loss: 1.871045, Train accuracy: 0.422373, val accuracy: 0.423360
Loss: 1.871054, Train accuracy: 0.422613, val accuracy: 0.421840
Loss: 1.871179, Train accuracy: 0.422773, val accuracy: 0.422400
Loss: 1.871144, Train accuracy: 0.422293, val accuracy: 0.422480
Loss: 1.871189, Train acc

KeyboardInterrupt: 

In [91]:
optimizer.decay(2)

In [109]:
rand_index = random.randint(0, dataset.val_y.shape[0] - 1)
testX = dataset.val_X[rand_index]
testY = dataset.val_y[rand_index]
test_num = testX.argmax(axis=-1).ravel()
srtd = np.eye(testX.shape[-1])[np.sort(test_num)]
predicted = nsort.predict(testX).ravel()
predicted_forward = nsort.forward(testX)
predicted_forward = predicted_forward.reshape(predicted_forward.shape[-2], predicted_forward.shape[-1])
predicted_onehot = np.eye(testX.shape[-1])[predicted_forward.argmax(axis=-1)]
print("unsorted: ", test_num)
print("sorted:   ", np.sort(test_num))
print("predicted:", predicted)

unsorted:  [8 2 4 2 9]
sorted:    [2 2 4 8 9]
predicted: [2 2 4 8 8]
