In [43]:
import numpy as np
import tensorflow as tf
import classifier, training
#import experiments.MPS_classifier.MPSMNIST as mm
%load_ext autoreload
%autoreload 2
tf.enable_eager_execution()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Data preprocessing

First let's load the MNIST dataset of hand-written digits from `tensorflow`.

In [38]:
train,valid,test=mm.load_MNIST('data')
train[0],new_order = mm.shuffle(train[0])
valid[0],_ = mm.shuffle(valid[0],new_order)
test[0],_ = mm.shuffle(test[0],new_order)
n_batches=50
data, labs = mm.generate_mapped_MNIST_batches(train[0],train[1],n_batches=n_batches,which='one_hot',scaling=1.0,  
                                              shuffle_pixels=False)
valid_data, valid_labels = mm.generate_mapped_MNIST_batches(valid[0], valid[1],n_batches=1)
N = data[0].shape[2]
x_train = np.transpose(data[0],(0,2,1))
y_train = labs[0]

loading MNIST data


In [39]:
print(x_train.shape)

(1000, 784, 2)


In [44]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data(path='mnist.npz')

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(60000, 28, 28) (60000,)
(10000, 28, 28) (10000,)


Next let's encode the data using the feature map $\Phi (p) = (p, 1-p)^T$ and transform the labels to one-hot format.

In [45]:
def data_encoder(data):
  return np.array([1 - data, data]).transpose([1, 2, 0])

def to_one_hot(labels, n_labels=10):
  one_hot = np.zeros((len(labels), n_labels))
  one_hot[np.arange(len(labels)), labels] = 1
  return one_hot

n_labels = len(np.unique(y_train))

# Flatten and normalize
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) / 255.0
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) / 255.0
# Encode
x_train = data_encoder(x_train)
x_test = data_encoder(x_test)
y_train = to_one_hot(y_train)
y_test = to_one_hot(y_test)

print(n_labels)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

10
(60000, 784, 2) (60000, 10)
(10000, 784, 2) (10000, 10)


# Define MPS classifier

Note that our MPS has one more site than the data because of the label tensor. We also have to set the bond dimension which is a hyperparameter and remains constant during training. In a more sophisticated implementation the bond dimension can be adaptively changed according to the complexity of training data by performing some SVD steps. This is currently not implemented but can be added in a future version.

In [46]:
mps = classifier.MatrixProductState(n_sites=x_train.shape[1] + 1,
                                    n_labels=n_labels,
                                    d_phys=x_train.shape[2],
                                    d_bond=12)

# Example training

We can train the `mps` object we created using the `training.fit` data. Here we perform a quick training in a small portion of the data without validation.

In [50]:
optimizer = tf.train.AdamOptimizer(learning_rate=1e-5)

mps, history = training.fit(mps, optimizer, x_train[0:1000], y_train[0:1000],
                            n_epochs=5, batch_size=50, n_message=1)


Epoch: 0
Time: 4.81847882270813
Loss: 0.007125909440219402
Accuracy: 0.999

Epoch: 1
Time: 9.556517839431763
Loss: 0.0010913354344666004
Accuracy: 1.0

Epoch: 2
Time: 14.318383932113647
Loss: 0.0003265396226197481
Accuracy: 1.0

Epoch: 3
Time: 19.09405493736267
Loss: 0.00016412910190410912
Accuracy: 1.0

Epoch: 4
Time: 23.891597747802734
Loss: 0.00010434159776195884
Accuracy: 1.0
