In [1]:
# python 3.6.9
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf

In [2]:
def create_model():
    model = keras.Sequential(
        [
            keras.Input(shape=input_shape),
            layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
            layers.MaxPooling2D(pool_size=(2, 2)),
            layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
            layers.MaxPooling2D(pool_size=(2, 2)),
            layers.Flatten(),
            layers.Dropout(0.5),
            layers.Dense(num_classes, activation="softmax"),
        ]
    )
    model.compile(
        loss="categorical_crossentropy",
        optimizer="adam",
        metrics=["accuracy"]
    )
    return model

In [3]:
def get_most_uncertain(model, x_arr, y_arr, k):
    predictions = model.predict(x_arr, batch_size=128)
    probs = tf.nn.softmax(predictions).numpy()
    diffs = np.array([])
    for p in probs:
        p.sort()
        diffs = np.append(diffs, abs(p[-1] - p[-2]))

    smallest_margins_idx = diffs.argsort()[:k]
    return smallest_margins_idx

In [19]:
# model / data parameters
num_classes = 10
input_shape = (28, 28, 1)
batch_size = 128
epochs = 5
n = 5000                                # number of trainnig samples (max 60000)
init_n = 100                            # number of samples used to initial training
n_queries = 10                           # number of al iterations
n_oracle = 50                           # number of samples provided by the oracle (per iteration)
n_pl = init_n + n_queries * n_oracle    # nunber of samples used to passive learnig
epochs_pl = 10

In [20]:
# the data, split between train and test sets
(X_TRAIN, Y_TRAIN), (X_TEST, Y_TEST) = keras.datasets.mnist.load_data()

In [21]:
# data preprocessing
x_train = X_TRAIN[:n]
x_init, x_train = np.split(x_train, (init_n,))
y_train = Y_TRAIN[:n]
y_init, y_train = np.split(y_train, (init_n,))

# Scale images to the [0, 1] range
x_init = x_init.astype("float32") / 255.0
x_train = x_train.astype("float32") / 255.0
x_test = X_TEST.astype("float32") / 255.0
# Make sure images have shape (28, 28, 1)
x_init = np.expand_dims(x_init, -1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

# convert class vectors to binary class matrices
y_init = keras.utils.to_categorical(y_init, num_classes)
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(Y_TEST, num_classes)

print("x_train shape:", x_train.shape)
print(x_init.shape[0], "init samples")
print(x_train.shape[0], "unlabelled samples")

x_train shape: (4900, 28, 28, 1)
100 init samples
4900 unlabelled samples


In [22]:
model = create_model()

In [23]:
# initial training
if init_n:
    model.fit(x_init, y_init, batch_size=batch_size, epochs=epochs)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [24]:
# active learning 
for k in range(n_queries):
    score = model.evaluate(x_train, y_train)
    print(f"Number of samples used to trainig: {x_init.shape[0]}")
    print(f"{k}. Test loss:", score[0])
    print("Test accuracy:", score[1])
    idxs = get_most_uncertain(model, x_train, y_train, n_oracle)
    x_init = np.append(x_init, x_train[idxs], axis=0)
    y_init = np.append(y_init, y_train[idxs], axis=0)
    x_train = np.delete(x_train, idxs, axis=0)
    y_train = np.delete(y_train, idxs, axis=0)

    model.fit(x_init, y_init, batch_size=batch_size, epochs=epochs)

Number of samples used to trainig: 100
0. Test loss: 2.212908983230591
Test accuracy: 0.19816327095031738
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Number of samples used to trainig: 150
1. Test loss: 2.0465331077575684
Test accuracy: 0.45010310411453247
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Number of samples used to trainig: 200
2. Test loss: 1.4410851001739502
Test accuracy: 0.637291669845581
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Number of samples used to trainig: 250
3. Test loss: 1.0048842430114746
Test accuracy: 0.723578929901123
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Number of samples used to trainig: 300
4. Test loss: 0.6563941240310669
Test accuracy: 0.7997872233390808
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Number of samples used to trainig: 350
5. Test loss: 0.46009698510169983
Test accuracy: 0.8623656034469604
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Number of samples used to trainig: 400
6. Test loss: 0.322687625

In [25]:
# lets use the same model and train it without AL 
model_pl = create_model()

In [26]:
# data preprocessing
x_train = X_TRAIN[:n_pl]
y_train = Y_TRAIN[:n_pl]

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255.0
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)

print("x_train shape:", x_train.shape)
print(x_train.shape[0], "training samples")

x_train shape: (600, 28, 28, 1)
600 training samples


In [27]:
# train model
model_pl.fit(x_train, y_train, batch_size=batch_size, epochs=epochs_pl)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f00fc1ae860>

In [28]:
print("Final evaluation")
print(f"Number of samples used to train: {x_init.shape[0]}")
print(f"Number of test samples: {x_test.shape[0]}")

score = model.evaluate(x_test, y_test)
print("AL model")
print("Test loss:", score[0])
print("Test accuracy:", score[1])

score = model_pl.evaluate(x_test, y_test)
print("\nStandard CNN model")
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Final evaluation
Number of samples used to train: 600
Number of test samples: 10000
AL model
Test loss: 0.16382446885108948
Test accuracy: 0.954200029373169

Standard CNN model
Test loss: 0.5297361612319946
Test accuracy: 0.8345000147819519
