<a href="https://colab.research.google.com/github/fagonzalezo/qmc/blob/master/examples/QMC_Multimodal_Mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install qmc if running in Google Colab

try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

if IN_COLAB:
    !pip install --upgrade  git+https://github.com/fagonzalezo/qmc.git
else:
    import sys
    sys.path.insert(0, "../")

In [2]:
import tensorflow as tf
import numpy as np
import qmc.tf.layers as layers
import qmc.tf.models as models

In [3]:
from tensorflow.keras.datasets import mnist
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape((60000,784))
X_test = X_test.reshape((10000,784))

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

transformer = OneHotEncoder(sparse=False)
y_train_bin = transformer.fit_transform(y_train[:, np.newaxis])
y_test_bin = transformer.fit_transform(y_test[:, np.newaxis])

print("shape X_train : ", X_train.shape)
print("shape y_train : ", y_train.shape)
print("shape X_test : ", X_test.shape)
print("shape y_test : ", y_test.shape)

shape X_train :  (60000, 784)
shape y_train :  (60000,)
shape X_test :  (10000, 784)
shape y_test :  (10000,)


## Baseline

In [4]:
BATCH_SIZE = 256

model = tf.keras.models.Sequential([
tf.keras.layers.Dense(128,activation='relu'),
tf.keras.layers.Dense(10),
tf.keras.layers.Softmax()
])
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[tf.keras.metrics.CategoricalAccuracy()],
)
EPOCHS = 10
  
history = model.fit(X_train, y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Half model

In [32]:
BATCH_SIZE = 256
def create_model(input_dim,
               output_dim,
               num_rff = 512,
               gamma = 2**-5,
               n_comp = 80,
               random_state = 0):
    inputs = tf.keras.Input(shape=(input_dim,))
    fm_x1 = layers.QFeatureMapRFF(input_dim, dim=num_rff , gamma=gamma, random_state=random_state)
    psi_x = fm_x1(inputs)
    ones = tf.ones_like(inputs[:, 0:1])
    rho_x = tf.keras.layers.concatenate((ones, psi_x), axis=1)
    rho_x = tf.expand_dims(rho_x, axis=-1)
    qmdmc = layers.QMClassifSDecompFDMatrix(dim_x=num_rff, dim_y=output_dim, n_comp=n_comp)
    rho_y = qmdmc(rho_x)
    y_w = rho_y[:, 0, :] # shape (b, d_in)
    y_v = rho_y[:, 1:, :] # shape (b, dim_x, d_in)
    probs = tf.einsum('...j,...ij,...ij->...i', y_w, y_v, tf.math.conj(y_v), optimize='optimal')
    qmdmclf = tf.keras.Model(inputs=inputs, outputs=probs)
    return qmdmclf

def create_model_rho(input_dim,
               output_dim,
               num_rff = 512,
               gamma = 2**-5,
               n_comp = 80,
               random_state = 0):
    inputs = tf.keras.Input(shape=(input_dim,))
    fm_x1 = layers.QFeatureMapRFF(input_dim, dim=num_rff , gamma=gamma, random_state=random_state)
    psi_x = fm_x1(inputs)
    ones = tf.ones_like(inputs[:, 0:1])
    rho_x = tf.keras.layers.concatenate((ones, psi_x), axis=1)
    rho_x = tf.expand_dims(rho_x, axis=-1)
    qmdmc = layers.QMClassifSDecompFDMatrix(dim_x=num_rff, dim_y=output_dim, n_comp=n_comp)
    rho_y = qmdmc(rho_x)
    qmdmclf = tf.keras.Model(inputs=inputs, outputs=rho_y)
    return qmdmclf


In [36]:
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
qmdmclf_1 = create_model(784 // 2, 10, num_rff=256, n_comp=20)
qmdmclf_1.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

EPOCHS = 10
BATCH_SIZE = 256  
history = qmdmclf_1.fit(X_train[:, 0:784 // 2], y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [37]:
qmdmclf_2 = create_model(784 // 2, 10, num_rff=256, n_comp=20)
qmdmclf_2.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

history = qmdmclf_2.fit(X_train[:, 784 // 2:], y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Multimodal model

In [55]:
inputs = tf.keras.Input(shape=(784,))
clf_1 = create_model_rho(784 // 2, 10, num_rff=256, n_comp=20)
clf_2 = create_model_rho(784 // 2, 10, num_rff=256, n_comp=20)
rho_1 = clf_1(inputs[:, :784 // 2])
rho_2 = clf_2(inputs[:, 784 // 2:])
prod = layers.DMCrossProduct()([rho_1, rho_2])
qmdmc = layers.QMClassifSDecompFDMatrix(dim_x=100, dim_y=10, n_comp=40)
rho_y = qmdmc(prod)
y_w = rho_y[:, 0, :] # shape (b, d_in)
y_v = rho_y[:, 1:, :] # shape (b, dim_x, d_in)
probs = tf.einsum('...j,...ij,...ij->...i', y_w, y_v, tf.math.conj(y_v), optimize='optimal')
qm_multim = tf.keras.Model(inputs=inputs, outputs=probs)



In [56]:
qm_multim.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

EPOCHS = 10
history = qm_multim.fit(X_train, y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
qm_multim.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 784)]        0                                            
__________________________________________________________________________________________________
tf.__operators__.getitem_8 (Sli (None, 392)          0           input_3[0][0]                    
__________________________________________________________________________________________________
tf.__operators__.getitem_9 (Sli (None, 392)          0           input_3[0][0]                    
__________________________________________________________________________________________________
model_2 (Functional)            (None, 11, 10)       103278      tf.__operators__.getitem_8[0][0] 
____________________________________________________________________________________________

## Noisy dataset

In [57]:
X_test_noise = np.array(X_test)
X_test_noise[:, :784 // 2] =  np.random.uniform(size=X_test.shape)[:, :784 // 2]
model.evaluate(X_test_noise, y_test_bin)



[9.70030689239502, 0.22439999878406525]

In [58]:
qm_multim.evaluate(X_test_noise, y_test_bin)



[1.6450669765472412, 0.39660000801086426]

## Partial model

In [60]:
inputs = tf.keras.Input(shape=(784,))
clf_1 = create_model_rho(784 // 2, 10, num_rff=256, n_comp=20)
rho_1 = clf_1(inputs[:, :784 // 2])
rho_in = tf.keras.layers.concatenate((tf.ones((1, 256)) / 256., tf.eye(256)), axis = 0)
rho_in = tf.expand_dims(rho_in, axis=0)
clf_2 = layers.QMClassifSDecompFDMatrix(dim_x=256, dim_y=10, n_comp=20, name="clf_2")
clf_2(rho_in)
clf_2.set_weights(qm_multim.layers[4].layers[-1].get_weights())
rho_2 = clf_2(rho_in)
#rho_2 = tf.keras.layers.concatenate((rho_2, tf.zeros((11, 10))), axis=1)
rho_2 = tf.broadcast_to(rho_2, shape=tf.shape(rho_1))
prod = layers.DMCrossProduct()([rho_1, rho_2])
qmdmc = layers.QMClassifSDecompFDMatrix(dim_x=100, dim_y=10, n_comp=40)
rho_y = qmdmc(prod)
y_w = rho_y[:, 0, :] # shape (b, d_in)
y_v = rho_y[:, 1:, :] # shape (b, dim_x, d_in)
probs = tf.einsum('...j,...ij,...ij->...i', y_w, y_v, tf.math.conj(y_v), optimize='optimal')
qm_partial = tf.keras.Model(inputs=inputs, outputs=probs)
qm_partial.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

In [61]:
qm_partial.layers

[<tensorflow.python.keras.engine.input_layer.InputLayer at 0x7fb4168e4280>,
 <tensorflow.python.keras.layers.core.SlicingOpLambda at 0x7fb4168e45b0>,
 <tensorflow.python.keras.engine.functional.Functional at 0x7fb3f127e850>,
 <tensorflow.python.keras.layers.core.TFOpLambda at 0x7fb41688c130>,
 <tensorflow.python.keras.layers.core.TFOpLambda at 0x7fb41688c940>,
 <qmc.tf.layers.DMCrossProduct at 0x7fb3f1283af0>,
 <qmc.tf.layers.QMClassifSDecompFDMatrix at 0x7fb416892970>,
 <tensorflow.python.keras.layers.core.SlicingOpLambda at 0x7fb416cfd400>,
 <tensorflow.python.keras.layers.core.SlicingOpLambda at 0x7fb416cf4610>,
 <tensorflow.python.keras.layers.core.TFOpLambda at 0x7fb416cfd7c0>,
 <tensorflow.python.keras.layers.core.TFOpLambda at 0x7fb41688c8b0>]

In [62]:
qm_partial.layers[2].set_weights(qm_multim.layers[3].get_weights())
qm_partial.layers[6].set_weights(qm_multim.layers[6].get_weights())

In [63]:
qm_partial.evaluate(X_test, y_test_bin)



[1.2016475200653076, 0.4797999858856201]

## Multimodal with fixed layers

In [64]:
inputs = tf.keras.Input(shape=(784,))
clf_1 = create_model_rho(784 // 2, 10, num_rff=256, n_comp=20)
clf_2 = create_model_rho(784 // 2, 10, num_rff=256, n_comp=20)
rho_1 = clf_1(inputs[:, :784 // 2])
rho_2 = clf_2(inputs[:, 784 // 2:])
prod = layers.DMCrossProduct()([rho_1, rho_2])
qmdmc = layers.QMClassifSDecompFDMatrix(dim_x=100, dim_y=10, n_comp=40)
rho_y = qmdmc(prod)
y_w = rho_y[:, 0, :] # shape (b, d_in)
y_v = rho_y[:, 1:, :] # shape (b, dim_x, d_in)
probs = tf.einsum('...j,...ij,...ij->...i', y_w, y_v, tf.math.conj(y_v), optimize='optimal')
qm_multim = tf.keras.Model(inputs=inputs, outputs=probs)

qm_multim.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

qm_multim.layers[3].set_weights(qmdmclf_1.get_weights())
qm_multim.layers[3].trainable = False
qm_multim.layers[4].set_weights(qmdmclf_2.get_weights())
qm_multim.layers[4].trainable = False


In [65]:

EPOCHS = 3
history = qm_multim.fit(X_train, y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [66]:
inputs = tf.keras.Input(shape=(784,))
clf_1 = create_model_rho(784 // 2, 10, num_rff=256, n_comp=20)
rho_1 = clf_1(inputs[:, :784 // 2])
rho_in = tf.keras.layers.concatenate((tf.ones((1, 256)) / 256., tf.eye(256)), axis = 0)
rho_in = tf.expand_dims(rho_in, axis=0)
clf_2 = layers.QMClassifSDecompFDMatrix(dim_x=256, dim_y=10, n_comp=20, name="clf_2")
clf_2(rho_in)
clf_2.set_weights(qm_multim.layers[4].layers[-1].get_weights())
rho_2 = clf_2(rho_in)
#rho_2 = tf.keras.layers.concatenate((rho_2, tf.zeros((11, 10))), axis=1)
rho_2 = tf.broadcast_to(rho_2, shape=tf.shape(rho_1))
prod = layers.DMCrossProduct()([rho_1, rho_2])
qmdmc = layers.QMClassifSDecompFDMatrix(dim_x=100, dim_y=10, n_comp=40)
rho_y = qmdmc(prod)
y_w = rho_y[:, 0, :] # shape (b, d_in)
y_v = rho_y[:, 1:, :] # shape (b, dim_x, d_in)
probs = tf.einsum('...j,...ij,...ij->...i', y_w, y_v, tf.math.conj(y_v), optimize='optimal')
qm_partial = tf.keras.Model(inputs=inputs, outputs=probs)
qm_partial.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

In [67]:
qm_partial.layers[2].set_weights(qm_multim.layers[3].get_weights())
qm_partial.layers[6].set_weights(qm_multim.layers[6].get_weights())

In [71]:
qm_partial.evaluate(X_test, y_test_bin)



[1.0419251918792725, 0.6604999899864197]

In [69]:
qm_multim.evaluate(X_test_noise, y_test_bin)



[0.868797779083252, 0.7153000235557556]