In [1]:
import moon.data
import moon.problem

In [2]:
probs = moon.data.read_problems('data/cleaned_probs.csv')

In [14]:
import numpy as np

x = np.array([p.array for p in probs])
y = np.array([p.grade.ordinal for p in probs])

print(x.shape, y.shape)

(30991, 18, 11) (30991, 17)


In [22]:
import tensorflow as tf
from tensorflow.keras import layers
import tensorflow.keras.backend as K

class OrdinalAccuracy(tf.keras.metrics.Metric):
    def __init__(self, name='ordinal_acc_within_k', k=0, threshold=0.5, **kwargs):
        super(OrdinalAccuracy, self).__init__(name=name, **kwargs)
        self.k = k
        self.threshold = threshold
        self.total = self.add_weight(name='total', initializer='zeros')
        self.count = self.add_weight(name='count', initializer='zeros')
    
    def update_state(self, y_true, y_pred, sample_weight=None):
        # See section 2.3: http://calla.rnet.missouri.edu/cheng_courses/rank.pdf
        pred_thresh = tf.cast(y_pred >= self.threshold, 'int8')
        true_thresh = tf.cast(y_true >= self.threshold, 'int8')
        pred_idx = tf.argmin(pred_thresh, axis=-1) - 1
        true_idx = tf.argmin(true_thresh, axis=-1) - 1
        distance_bools = (tf.abs(pred_idx - true_idx) <= self.k)
        correct = tf.reduce_sum(tf.cast(distance_bools, 'float32'))
        incorrect = tf.reduce_sum(tf.cast(tf.logical_not(distance_bools), 'float32'))
        self.total.assign_add(correct)
        self.count.assign_add(correct + incorrect)
    
    def result(self):
        return self.total / self.count

    
# α = 0.3
p = .4
input_shape = moon.problem.Problem.GRID_SHAPE
hiddens = [16, 16]
hidden_activation = 'swish'
output_shape = moon.problem.Grade.N_GRADES
output_activation = 'sigmoid'
loss = 'mse'
adam_lr = 1e-4

metrics = [OrdinalAccuracy(name='acc0', k=0), OrdinalAccuracy(name='acc1', k=1), OrdinalAccuracy(name='acc2', k=2)]
optim = tf.keras.optimizers.Adam(lr=adam_lr)

In [23]:
in_x = layers.Input(shape=input_shape)
features = layers.Flatten()(in_x)
for nodes in hiddens:
    features = layers.Dense(nodes, activation=hidden_activation)(features)
    # features = layers.ReLU(α)(features)
    if p > 0: features = layers.Dropout(p)(features)
out_y = layers.Dense(output_shape, activation=output_activation)(features)

model = tf.keras.Model(in_x, out_y, name='Feedforward_classifier')
model.compile(loss=loss, optimizer=optim, metrics=metrics)
# model.summary()

In [17]:
split = 0.2
indices = np.arange(len(probs))
np.random.shuffle(indices)
n_test = int(len(probs) * split)

x_train, y_train = x[indices[n_test:]], y[indices[n_test:]]
x_test, y_test = x[indices[0:n_test]], y[indices[0:n_test]]
print(x_train.shape, x_test.shape)

(24793, 18, 11) (6198, 18, 11)


In [24]:
callbacks = [tf.keras.callbacks.EarlyStopping(patience=4, verbose=1)]
batch_size = 64
max_epochs = 300

history = model.fit(x_train, y_train, batch_size=batch_size, validation_data=(x_test, y_test), epochs=max_epochs, callbacks=callbacks)

# [256, 128, 64], p=.4, flattened, 28e
# loss: 0.0396 - acc0: 0.4144 - acc1: 0.7597 - acc2: 0.9229 - val_loss: 0.0414 - val_acc0: 0.3925 - val_acc1: 0.7375 - val_acc2: 0.9140

# [16, 16], p=.4, flattened, 76e
# loss: 0.0446 - acc0: 0.3766 - acc1: 0.7139 - acc2: 0.8944 - val_loss: 0.0416 - val_acc0: 0.3879 - val_acc1: 0.7438 - val_acc2: 0.9138

# [16, 16], p=.4, flattened, lr 1e-4, 202e
# loss: 0.0463 - acc0: 0.3726 - acc1: 0.6968 - acc2: 0.8863 - val_loss: 0.0429 - val_acc0: 0.3987 - val_acc1: 0.7257 - val_acc2: 0.9037

# [32], p=.4, flattened, 31e
# loss: 0.0431 - acc0: 0.3896 - acc1: 0.7303 - acc2: 0.9021 - val_loss: 0.0416 - val_acc0: 0.4055 - val_acc1: 0.7385 - val_acc2: 0.9096

# [256, 128, 64], p=.4, 3d, 24e
# loss: 0.0409 - acc0: 0.4075 - acc1: 0.7475 - acc2: 0.9123 - val_loss: 0.0421 - val_acc0: 0.3945 - val_acc1: 0.7389 - val_acc2: 0.9076

# [64, 64], p=.4, 3d, 32e
# loss: 0.0420 - acc0: 0.4018 - acc1: 0.7372 - acc2: 0.9087 - val_loss: 0.0419 - val_acc0: 0.3992 - val_acc1: 0.7396 - val_acc2: 0.9080

# [64], p=.4, 3d, 44e
# loss: 0.0405 - acc0: 0.4103 - acc1: 0.7505 - acc2: 0.9152 - val_loss: 0.0416 - val_acc0: 0.4016 - val_acc1: 0.7402 - val_acc2: 0.9092

# [32], p=.4, 3d, 59e
# loss: 0.0418 - acc0: 0.4037 - acc1: 0.7396 - acc2: 0.9084 - val_loss: 0.0420 - val_acc0: 0.4045 - val_acc1: 0.7314 - val_acc2: 0.9093

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300


Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 79/300
Epoch 80/300
Epoch 81/300


Epoch 82/300
Epoch 83/300
Epoch 84/300
Epoch 85/300
Epoch 86/300
Epoch 87/300
Epoch 88/300
Epoch 89/300
Epoch 90/300
Epoch 91/300
Epoch 92/300
Epoch 93/300
Epoch 94/300
Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300
Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300
Epoch 106/300
Epoch 107/300
Epoch 108/300
Epoch 109/300
Epoch 110/300
Epoch 111/300
Epoch 112/300
Epoch 113/300
Epoch 114/300
Epoch 115/300
Epoch 116/300
Epoch 117/300
Epoch 118/300
Epoch 119/300
Epoch 120/300
Epoch 121/300


Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300


Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300
Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 194/300
Epoch 195/300
Epoch 196/300
Epoch 197/300
Epoch 198/300
Epoch 199/300
Epoch 200/300
Epoch 201/300


Epoch 202/300
Epoch 00202: early stopping


In [78]:
# print(np.round(model.predict(x_test[99][np.newaxis, :]) * 100))
# print(y_test[99])
y_true = y_test[[2, 4, 8, 16, 32]]
y_pred = model.predict(x_test[[2, 4, 8, 16, 32]])

In [79]:
pred_thresh = tf.cast(y_pred >= .5, 'int8')
true_thresh = tf.cast(y_true >= .5, 'int8')
print(pred_thresh, true_thresh)

# Since tf.argmin returns the lowest index possible, this gets us the index of the last True value.
# Note that this is -1 if the network doesn't many any prediction at all.
pred_idx = tf.argmin(pred_thresh, axis=-1) - 1
true_idx = tf.argmin(true_thresh, axis=-1) - 1
print(pred_idx, true_idx)

distance_bools = tf.abs(pred_idx - true_idx) <= 0
correct = tf.reduce_sum(tf.cast(distance_bools, 'float32'))
incorrect = tf.reduce_sum(tf.cast(tf.logical_not(distance_bools), 'float32'))
print(distance_bools, correct, incorrect)

print(correct / (correct+incorrect))

tf.Tensor(
[[1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0]
 [1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0]
 [1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0]
 [1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0]
 [1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0]], shape=(5, 17), dtype=int8) tf.Tensor(
[[1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0]
 [1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0]
 [1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0]
 [1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0]
 [1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0]], shape=(5, 17), dtype=int8)
tf.Tensor([7 8 7 8 6], shape=(5,), dtype=int64) tf.Tensor([8 9 6 7 6], shape=(5,), dtype=int64)
tf.Tensor([False False False False  True], shape=(5,), dtype=bool) tf.Tensor(1.0, shape=(), dtype=float32) tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(0.2, shape=(), dtype=float32)
