# Learning Word Meta-Embeddings by Autoencoding

https://www.aclweb.org/anthology/C18-1140/

Danushka Bollegala, Cong Bao

In [None]:
print(f"Now reading ./Encoded/{model_to_use}.npy...")
encoded = np.load(f'./Encoded/{model_to_use}.npy')
config = AutoConfig.from_pretrained(MODEL_TO_USE)
#last part of encoded is the embeddings at the input, so they're all the same: the input embedding for [CLS] at before it's fed into the network
encoded = encoded[:, :-config.dim]
encoded = encoded.astype(dtype=np.float32, copy=False)

X_train, X_valid = train_test_split(encoded, test_size=0.15)
dataset_train = tf.data.Dataset.from_tensor_slices(X_train)
dataset_valid = tf.data.Dataset.from_tensor_slices(X_valid)

dataset_train = dataset_train.map(lambda x: (x,x))
dataset_train = dataset_train.shuffle(10000)
dataset_train = dataset_train.batch(batch_size)

dataset_valid = dataset_valid.map(lambda x: (x,x))
dataset_valid = dataset_valid.shuffle(10000)
dataset_valid = dataset_valid.batch(batch_size)


#From Aurelien Geron's Hands-on Machine Learning 2nd ed. https://github.com/ageron/handson-ml2/blob/master/17_autoencoders_and_gans.ipynb
class DenseTranspose(keras.layers.Layer):
    def __init__(self, dense, activation=None, **kwargs):
        self.dense=dense
        self.activation = keras.activations.get(activation)
        super().__init__(**kwargs)
    def build(self, batch_input_shape):
        self.biases = self.add_weight(name="bias", initializer="zeros", shape=[self.dense.input_shape[-1]])
        super().build(batch_input_shape)
    def call(self, inputs):
        z = tf.matmul(inputs, self.dense.weights[0], transpose_b=True)
        return self.activation(z + self.biases)

In [None]:
keras.backend.clear_session()

dense_1 = keras.layers.Dense(3072, activation="selu")
dense_2 = keras.layers.Dense(2048, activation="selu")
dense_3 = keras.layers.Dense(1024, activation="selu")

encoder = keras.models.Sequential([
    keras.layers.InputLayer(input_shape=d_train.element_spec[0].shape[1:]),
    dense_1,
    dense_2,
    dense_3
])

tied_decoder = keras.models.Sequential([
    DenseTranspose(dense_3, activation="selu"),
    DenseTranspose(dense_2, activation="selu"),
    DenseTranspose(dense_1, activation="sigmoid")
])

tied_ae = keras.models.Sequential([encoder, tied_decoder])

callback_list = [keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3)]

tied_ae.compile(loss="mse", optimizer=keras.optimizers.Adagrad(), metrics=["accuracy"])

hist = tied_ae.fit(d_train, epochs=10, validation_data=d_valid, callbacks=callback_list)