<a href="https://colab.research.google.com/github/fagonzalezo/qmc/blob/master/examples/QMC_Multimodal_Mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install qmc if running in Google Colab

try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

if IN_COLAB:
    !pip install --upgrade  git+https://github.com/fagonzalezo/qmc.git
else:
    import sys
    sys.path.insert(0, "../")

In [2]:
import tensorflow as tf
import numpy as np
import qmc.tf.layers as layers
import qmc.tf.models as models

In [44]:
from tensorflow.keras.datasets import mnist
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape((60000,784))
X_test = X_test.reshape((10000,784))

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

transformer = OneHotEncoder(sparse=False)
y_train_bin = transformer.fit_transform(y_train[:, np.newaxis])
y_test_bin = transformer.fit_transform(y_test[:, np.newaxis])

print("shape X_train : ", X_train.shape)
print("shape y_train : ", y_train.shape)
print("shape X_test : ", X_test.shape)
print("shape y_test : ", y_test.shape)

shape X_train :  (60000, 784)
shape y_train :  (60000,)
shape X_test :  (10000, 784)
shape y_test :  (10000,)


## Baseline

In [42]:
BATCH_SIZE = 256

model = tf.keras.models.Sequential([
tf.keras.layers.Dense(128,activation='relu'),
tf.keras.layers.Dense(10),
tf.keras.layers.Softmax()
])
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[tf.keras.metrics.CategoricalAccuracy()],
)
EPOCHS = 10
  
history = model.fit(X_train, y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Half model

In [5]:
BATCH_SIZE = 256
def create_model(input_dim,
               output_dim,
               num_rff = 512,
               gamma = 2**-5,
               n_comp = 80,
               random_state = 0):
    inputs = tf.keras.Input(shape=(input_dim,))
    fm_x1 = layers.QFeatureMapRFF(input_dim, dim=num_rff , gamma=gamma, random_state=random_state)
    psi_x = fm_x1(inputs)
    ones = tf.ones_like(inputs[:, 0:1])
    rho_x = tf.keras.layers.concatenate((ones, psi_x), axis=1)
    rho_x = tf.expand_dims(rho_x, axis=-1)
    qmdmc = layers.QMClassifSDecompFDMatrix(dim_x=num_rff, dim_y=output_dim, n_comp=n_comp)
    rho_y = qmdmc(rho_x)
    y_w = rho_y[:, 0, :] # shape (b, d_in)
    y_v = rho_y[:, 1:, :] # shape (b, dim_x, d_in)
    probs = tf.einsum('...j,...ij,...ij->...i', y_w, y_v, tf.math.conj(y_v), optimize='optimal')
    qmdmclf = tf.keras.Model(inputs=inputs, outputs=probs)
    return qmdmclf

def create_model_rho(input_dim,
               output_dim,
               num_rff = 512,
               gamma = 2**-5,
               n_comp = 80,
               random_state = 0):
    inputs = tf.keras.Input(shape=(input_dim,))
    fm_x1 = layers.QFeatureMapRFF(input_dim, dim=num_rff , gamma=gamma, random_state=random_state)
    psi_x = fm_x1(inputs)
    ones = tf.ones_like(inputs[:, 0:1])
    rho_x = tf.keras.layers.concatenate((ones, psi_x), axis=1)
    rho_x = tf.expand_dims(rho_x, axis=-1)
    qmdmc = layers.QMClassifSDecompFDMatrix(dim_x=num_rff, dim_y=output_dim, n_comp=n_comp)
    rho_y = qmdmc(rho_x)
    qmdmclf = tf.keras.Model(inputs=inputs, outputs=rho_y)
    return qmdmclf


In [21]:
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
qmdmclf_1 = create_model(784 // 2, 10, num_rff=256, n_comp=20)
qmdmclf_1.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

EPOCHS = 10
BATCH_SIZE = 256  
history = qmdmclf_1.fit(X_train[:, 0:784 // 2], y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [22]:
qmdmclf_2 = create_model(784 // 2, 10, num_rff=256, n_comp=20)
qmdmclf_2.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

history = qmdmclf_2.fit(X_train[:, 784 // 2:], y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Multimodal model

In [117]:
inputs = tf.keras.Input(shape=(784,))
clf_1 = create_model_rho(784 // 2, 10, num_rff=256, n_comp=10)
clf_2 = create_model_rho(784 // 2, 10, num_rff=256, n_comp=10)
rho_1 = clf_1(inputs[:, :784 // 2])
rho_2 = clf_2(inputs[:, 784 // 2:])
prod = layers.DMCrossProduct()([rho_1, rho_2])
qmdmc = layers.QMClassifSDecompFDMatrix(dim_x=100, dim_y=10, n_comp=40)
rho_y = qmdmc(prod)
y_w = rho_y[:, 0, :] # shape (b, d_in)
y_v = rho_y[:, 1:, :] # shape (b, dim_x, d_in)
probs = tf.einsum('...j,...ij,...ij->...i', y_w, y_v, tf.math.conj(y_v), optimize='optimal')
qm_multim = tf.keras.Model(inputs=inputs, outputs=probs)



In [118]:
qm_multim.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

EPOCHS = 3
history = qm_multim.fit(X_train, y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [29]:
qm_multim.summary()

Model: "model_23"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_23 (InputLayer)           [(None, 784)]        0                                            
__________________________________________________________________________________________________
tf.__operators__.getitem_47 (Sl (None, 392)          0           input_23[0][0]                   
__________________________________________________________________________________________________
tf.__operators__.getitem_48 (Sl (None, 392)          0           input_23[0][0]                   
__________________________________________________________________________________________________
model_21 (Functional)           (None, 11, 10)       103278      tf.__operators__.getitem_47[0][0]
___________________________________________________________________________________________

## Noisy dataset

In [46]:
X_test_noise = np.array(X_test)
X_test_noise[:, :784 // 2] =  np.random.uniform(size=X_test.shape)[:, :784 // 2]
model.evaluate(X_test_noise, y_test_bin)



[9.556869506835938, 0.23489999771118164]

In [51]:
qm_multim.evaluate(X_test_noise, y_test_bin)



[1.4350522756576538, 0.47040000557899475]

## Partial model

In [120]:
inputs = tf.keras.Input(shape=(784,))
clf_1 = create_model_rho(784 // 2, 10, num_rff=256, n_comp=10)
clf_2 = layers.QMClassifSDecompFDMatrix()
rho_1 = clf_1(inputs[:, :784 // 2])
rho_2 = tf.keras.layers.concatenate((tf.ones((1, 10)) / 10., tf.eye(10)), axis = 0)
#rho_2 = tf.keras.layers.concatenate((rho_2, tf.zeros((11, 10))), axis=1)
rho_2 = tf.broadcast_to(rho_2, shape=tf.shape(rho_1))
prod = layers.DMCrossProduct()([rho_1, rho_2])
qmdmc = layers.QMClassifSDecompFDMatrix(dim_x=100, dim_y=10, n_comp=40)
rho_y = qmdmc(prod)
y_w = rho_y[:, 0, :] # shape (b, d_in)
y_v = rho_y[:, 1:, :] # shape (b, dim_x, d_in)
probs = tf.einsum('...j,...ij,...ij->...i', y_w, y_v, tf.math.conj(y_v), optimize='optimal')
qm_partial = tf.keras.Model(inputs=inputs, outputs=probs)
qm_partial.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

In [121]:
qm_partial.layers[2].set_weights(qm_multim.layers[3].get_weights())
qm_partial.layers[6].set_weights(qm_multim.layers[6].get_weights())

In [122]:
qm_partial.evaluate(X_test, y_test_bin)



[1.6514054536819458, 0.35740000009536743]

In [6]:
BATCH_SIZE = 256
input_dim = 784
component_dim = 128
gamma = 2**-5
n_comp = 80
random_state = 0

inputs = tf.keras.Input(shape=(784,))
fm_x = layers.QFeatureMapRFF(784, dim=component_dim , gamma=gamma, random_state=random_state)
psi_x = fm_x(inputs)
ones = tf.ones_like(inputs[:, 0:1])
rho_x = tf.keras.layers.concatenate((ones, psi_x), axis=1)
rho_x = tf.expand_dims(rho_x, axis=-1)
#qmdmc = layers.QMeasureDMClassifEig(dim_x=component_dim , dim_y=10, eig_out=num_eig, num_eig=num_eig)
qmdmc = layers.QMClassifSDecompFDMatrix(dim_x=component_dim, dim_y=10, n_comp=n_comp)
rho_y = qmdmc(rho_x)
y_w = rho_y[:, 0, :] # shape (b, d_in)
y_v = rho_y[:, 1:, :] # shape (b, dim_x, d_in)
probs = tf.einsum('...j,...ij,...ij->...i', y_w, y_v, tf.math.conj(y_v))
qmdmclf = tf.keras.Model(inputs=inputs, outputs=probs)
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
qmdmclf.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

EPOCHS = 10
  
history = qmdmclf.fit(X_train, y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [7]:
inputs = tf.keras.Input(shape=(784,))
fm_x1 = layers.QFeatureMapRFF(784, dim=component_dim , gamma=gamma, random_state=random_state)
psi_x = fm_x1(inputs)
ones = tf.ones_like(inputs[:, 0:1])
rho_x = tf.keras.layers.concatenate((ones, psi_x), axis=1)
rho_x = tf.expand_dims(rho_x, axis=-1)
qmdmc1 = layers.QMClassifSDecompFDMatrix(dim_x=component_dim, dim_y=10, n_comp=n_comp)
rho_h = qmdmc1(rho_x)
qmdmc2 = layers.QMClassifSDecompFDMatrix(dim_x=10, dim_y=10, n_comp=n_comp)
rho_y = qmdmc2(rho_h)
y_w = rho_y[:, 0, :] # shape (b, d_in)
y_v = rho_y[:, 1:, :] # shape (b, dim_x, d_in)
probs = tf.einsum('...j,...ij,...ij->...i', y_w, y_v, tf.math.conj(y_v))
qmdmclf2 = tf.keras.Model(inputs=inputs, outputs=probs)
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
qmdmclf2.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=opt, metrics=['accuracy'])

# We freeze the weights of the first layer and train the second
fm_x1.set_weights(fm_x.get_weights())
fm_x1.trainable = False
qmdmc1.set_weights(qmdmc.get_weights())
qmdmc1.trainable = False
EPOCHS = 5
  
history = qmdmclf2.fit(X_train, y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [8]:
# We free all the weights and fine tune

fm_x1.trainable = True
qmdmc1.trainable = True
EPOCHS = 10
history = qmdmclf2.fit(X_train, y_train_bin, batch_size=BATCH_SIZE,
                    epochs=EPOCHS, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
