<a href="https://colab.research.google.com/github/fagonzalezo/qmc/blob/master/examples/Multilayer_QMC_Mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install qmc if running in Google Colab

try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

if IN_COLAB:
    !pip install --upgrade  git+https://github.com/fagonzalezo/qmc.git
else:
    import sys
    sys.path.insert(0, "../")

In [3]:
import tensorflow as tf
import numpy as np
import qmc.tf.layers as layers
import qmc.tf.models as models

In [4]:
from tensorflow.keras.datasets import mnist
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape((60000,784))
X_test = X_test.reshape((10000,784))

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

transformer = OneHotEncoder(sparse=False)
y_train_bin = transformer.fit_transform(y_train[:, np.newaxis])

print("shape X_train : ", X_train.shape)
print("shape y_train : ", y_train.shape)
print("shape X_test : ", X_test.shape)
print("shape y_test : ", y_test.shape)

shape X_train :  (60000, 784)
shape y_train :  (60000,)
shape X_test :  (10000, 784)
shape y_test :  (10000,)


## Baseline

In [5]:
BATCH_SIZE = 256

model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128,activation='relu'),
tf.keras.layers.Dense(10),
tf.keras.layers.Softmax()
])
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[tf.keras.metrics.CategoricalAccuracy()],
)
EPOCHS = 10
  
history = model.fit(X_train, y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/10
  1/188 [..............................] - ETA: 26s - loss: 2.4091 - categorical_accuracy: 0.0781

2021-11-17 20:35:12.092996: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2021-11-17 20:35:12.095529: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Full training

In [6]:
BATCH_SIZE = 256
input_dim = 784
num_rff = 512
gamma = 2**-5
n_comp = 80
random_state = 0
dim_h = 30

inputs = tf.keras.Input(shape=(784,))
fm_x1 = layers.QFeatureMapRFF(784, dim=num_rff , gamma=gamma, random_state=random_state)
psi_x = fm_x1(inputs)
ones = tf.ones_like(inputs[:, 0:1])
rho_x = tf.keras.layers.concatenate((ones, psi_x), axis=1)
rho_x = tf.expand_dims(rho_x, axis=-1)
qmdmc1 = layers.QMClassifSDecompFDMatrix(dim_x=num_rff, dim_y=dim_h, n_comp=n_comp)
rho_h = qmdmc1(rho_x)
qmdmc2 = layers.QMClassifSDecompFDMatrix(dim_x=dim_h, dim_y=10, n_comp=n_comp)
rho_y = qmdmc2(rho_h)
y_w = rho_y[:, 0, :] # shape (b, d_in)
y_v = rho_y[:, 1:, :] # shape (b, dim_x, d_in)
probs = tf.einsum('...j,...ij,...ij->...i', y_w, y_v, tf.math.conj(y_v))
qmdmclf2 = tf.keras.Model(inputs=inputs, outputs=probs)
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
qmdmclf2.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

# fm_x1.trainable = False

EPOCHS = 10
  
history = qmdmclf2.fit(X_train, y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Layerwise training

In [7]:
BATCH_SIZE = 256
input_dim = 784
component_dim = 128
gamma = 2**-5
n_comp = 80
random_state = 0

inputs = tf.keras.Input(shape=(784,))
fm_x = layers.QFeatureMapRFF(784, dim=component_dim , gamma=gamma, random_state=random_state)
psi_x = fm_x(inputs)
ones = tf.ones_like(inputs[:, 0:1])
rho_x = tf.keras.layers.concatenate((ones, psi_x), axis=1)
rho_x = tf.expand_dims(rho_x, axis=-1)
#qmdmc = layers.QMeasureDMClassifEig(dim_x=component_dim , dim_y=10, eig_out=num_eig, num_eig=num_eig)
qmdmc = layers.QMClassifSDecompFDMatrix(dim_x=component_dim, dim_y=10, n_comp=n_comp)
rho_y = qmdmc(rho_x)
y_w = rho_y[:, 0, :] # shape (b, d_in)
y_v = rho_y[:, 1:, :] # shape (b, dim_x, d_in)
probs = tf.einsum('...j,...ij,...ij->...i', y_w, y_v, tf.math.conj(y_v))
qmdmclf = tf.keras.Model(inputs=inputs, outputs=probs)
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
qmdmclf.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

EPOCHS = 10
  
history = qmdmclf.fit(X_train, y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
inputs = tf.keras.Input(shape=(784,))
fm_x1 = layers.QFeatureMapRFF(784, dim=component_dim , gamma=gamma, random_state=random_state)
psi_x = fm_x1(inputs)
ones = tf.ones_like(inputs[:, 0:1])
rho_x = tf.keras.layers.concatenate((ones, psi_x), axis=1)
rho_x = tf.expand_dims(rho_x, axis=-1)
qmdmc1 = layers.QMClassifSDecompFDMatrix(dim_x=component_dim, dim_y=10, n_comp=n_comp)
rho_h = qmdmc1(rho_x)
qmdmc2 = layers.QMClassifSDecompFDMatrix(dim_x=10, dim_y=10, n_comp=n_comp)
rho_y = qmdmc2(rho_h)
y_w = rho_y[:, 0, :] # shape (b, d_in)
y_v = rho_y[:, 1:, :] # shape (b, dim_x, d_in)
probs = tf.einsum('...j,...ij,...ij->...i', y_w, y_v, tf.math.conj(y_v))
qmdmclf2 = tf.keras.Model(inputs=inputs, outputs=probs)
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
qmdmclf2.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

# We freeze the weights of the first layer and train the second
fm_x1.set_weights(fm_x.get_weights())
fm_x1.trainable = False
qmdmc1.set_weights(qmdmc.get_weights())
qmdmc1.trainable = False
EPOCHS = 5
  
history = qmdmclf2.fit(X_train, y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [9]:
# We free all the weights and fine tune

fm_x1.trainable = True
qmdmc1.trainable = True
EPOCHS = 10
history = qmdmclf2.fit(X_train, y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
