In [4]:
from pond.tensor import NativeTensor, PrivateEncodedTensor, PublicEncodedTensor
from pond.nn import Dense, Sigmoid, Reveal, Diff, Softmax, CrossEntropy, Sequential, DataLoader

In [5]:
import numpy as np
from datetime import datetime

# Feature extraction

In [6]:
import keras
from keras.utils import to_categorical

In [7]:
def preprocess_data(dataset):
    
    (x_train, y_train), (x_test, y_test) = dataset
    
    # NOTE: this is the shape used by Tensorflow; other backends may differ
    # Reshape to (B, H, W, C)
    x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
    x_test  = x_test.reshape(x_test.shape[0], 28, 28, 1)
    
    x_train  = x_train.astype('float32')
    x_test   = x_test.astype('float32')
    # Normalise to [0,1] (greyscale images)
    x_train /= 255
    x_test  /= 255

    y_train = to_categorical(y_train, 5)
    y_test  = to_categorical(y_test, 5)
    
    return (x_train, y_train), (x_test, y_test)

def load_data():
    
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
    
    # Take examples with target digit <5 for public training data
    x_train_public = x_train[y_train < 5]
    y_train_public = y_train[y_train < 5]
    x_test_public  = x_test[y_test < 5]
    y_test_public  = y_test[y_test < 5]
    public_dataset = (x_train_public, y_train_public), (x_test_public, y_test_public)
    
    # Take remaining examples for private fine-tuning
    x_train_private = x_train[y_train >= 5]
    y_train_private = y_train[y_train >= 5] - 5
    x_test_private  = x_test[y_test >= 5]
    y_test_private  = y_test[y_test >= 5] - 5
    private_dataset = (x_train_private, y_train_private), (x_test_private, y_test_private)
    
    return preprocess_data(public_dataset), preprocess_data(private_dataset)

## Pre-train on public data

In [8]:
public_dataset, private_dataset = load_data()

# Uses equivalent layers to MPC CNN layers
# i.e. signoid instead of ReLU and average pooling instead of max pooling
feature_layers = [
    keras.layers.Conv2D(32, (3, 3), padding='same', input_shape=(28, 28, 1)),
    keras.layers.Activation('sigmoid'),
    keras.layers.Conv2D(32, (3, 3), padding='same'),
    keras.layers.Activation('sigmoid'),
    keras.layers.AveragePooling2D(pool_size=(2,2)),
    keras.layers.Dropout(.25),
    keras.layers.Flatten()
]

classification_layers = [
    keras.layers.Dense(128),
    keras.layers.Activation('sigmoid'),
    keras.layers.Dropout(.50),
    keras.layers.Dense(5),
    keras.layers.Activation('softmax')
]

model = keras.models.Sequential(feature_layers + classification_layers)

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy'])

(x_train, y_train), (x_test, y_test) = public_dataset

# Pre-train model on public data
model.fit(
    x_train, y_train,
    epochs=5,
    batch_size=32,
    verbose=1,
    validation_data=(x_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1ee87213df0>

## Extract features from private data (unencrypted for now)

In [9]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 32)        320       
_________________________________________________________________
activation (Activation)      (None, 28, 28, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 28, 28, 32)        9248      
_________________________________________________________________
activation_1 (Activation)    (None, 28, 28, 32)        0         
_________________________________________________________________
average_pooling2d (AveragePo (None, 14, 14, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 14, 14, 32)        0         
_________________________________________________________________
flatten (Flatten)            (None, 6272)              0

In [11]:
flatten_layer = model.get_layer(index=6)
assert flatten_layer.name.startswith('flatten')  # check we're taking the correct layer (last layer before classification layers)

# Feature extractor (layers up to FC layers)
extractor = keras.models.Model(
    inputs=model.input, 
    outputs=flatten_layer.output
)

In [12]:
(x_train_images, y_train), (x_test_images, y_test) = private_dataset

# Get features from private dataset (unencrypted)
x_train_features = extractor.predict(x_train_images)
x_test_features  = extractor.predict(x_test_images)

## Save extracted features

In [13]:
np.save('x_train_features.npy', x_train_features)
np.save('y_train.npy', y_train)

np.save('x_test_features.npy', x_test_features)
np.save('y_test.npy', y_test)

## Load extracted features

In [14]:
x_train_features = np.load('x_train_features.npy')
y_train = np.load('y_train.npy')

x_test_features = np.load('x_test_features.npy')
y_test = np.load('y_test.npy')

print(x_train_features.shape, y_train.shape, x_test_features.shape, y_test.shape)

(29404, 6272) (29404, 5) (4861, 6272) (4861, 5)


# Fine-tune

In [15]:
# Using MPC-defined Sequential and layers

classifier = Sequential([
    Dense(128, 6272),
    Sigmoid(),
    # Dropout(.5),
    Dense(5, 128),
    Reveal(),
    Softmax()
])

In [16]:
def accuracy(classifier, x, y, verbose=0, wrapper=NativeTensor):
    predicted_classes = classifier \
        .predict(DataLoader(x, wrapper), verbose=verbose).reveal() \
        .argmax(axis=1)
        
    correct_classes = NativeTensor(y) \
        .argmax(axis=1)
        
    matches = predicted_classes.unwrap() == correct_classes.unwrap()
    return sum(matches)/len(matches)

## ... using NativeTensor

In [17]:
classifier.initialize(x_train_features.shape, NativeTensor)

start = datetime.now()
classifier.fit(
    DataLoader(x_train_features, wrapper=NativeTensor), 
    DataLoader(y_train, wrapper=NativeTensor), 
    loss=CrossEntropy(), 
    epochs=3,
    verbose=1
)
stop = datetime.now()

print("Elapsed:", stop - start)

2020-06-27 18:00:55.141997 Epoch 1/3

AttributeError: 'NoneType' object has no attribute 'shape'

In [29]:
print("Train accuracy:", accuracy(classifier, x_train_features, y_train))
print("Test accuracy:",  accuracy(classifier, x_test_features,  y_test))

Train accuracy: 0.9066793633519249
Test accuracy: 0.9094836453404649


## ... using PublicEncodedTensor

In [19]:
classifier.initialize(x_train_features.shape, PublicEncodedTensor)

start = datetime.now()
classifier.fit(
    DataLoader(x_train_features, wrapper=PublicEncodedTensor), 
    DataLoader(y_train, wrapper=PublicEncodedTensor),
    loss=CrossEntropy(), 
    epochs=3,
    verbose=1
)
stop = datetime.now()

print("Elapsed:", stop - start)

2020-06-27 18:02:42.334865 Epoch 1/3


  return PublicEncodedTensor.from_values(1. / decode(x.elements))


OverflowError: Python int too large to convert to C long

In [31]:
print("Train accuracy:", accuracy(classifier, x_train_features, y_train, verbose=2))
print("Test accuracy:",  accuracy(classifier, x_test_features,  y_test,  verbose=2))

2018-06-13 18:06:29.814125 Batch 0
2018-06-13 18:06:32.826425 Batch 1
2018-06-13 18:06:35.846949 Batch 2
2018-06-13 18:06:38.829803 Batch 3
2018-06-13 18:06:41.808488 Batch 4
2018-06-13 18:06:44.766767 Batch 5
2018-06-13 18:06:47.721123 Batch 6
2018-06-13 18:06:50.668613 Batch 7
2018-06-13 18:06:53.589826 Batch 8
2018-06-13 18:06:56.528399 Batch 9
2018-06-13 18:06:59.497149 Batch 10
2018-06-13 18:07:02.436153 Batch 11
2018-06-13 18:07:05.411810 Batch 12
2018-06-13 18:07:08.310117 Batch 13
2018-06-13 18:07:11.218999 Batch 14
2018-06-13 18:07:14.101841 Batch 15


KeyboardInterrupt: 

## ... using PrivateEncodedTensor

In [None]:
classifier.initialize()

start = datetime.now()
classifier.fit(
    DataLoader(x_train_features, wrapper=PrivateEncodedTensor), 
    DataLoader(y_train, wrapper=PrivateEncodedTensor),
    loss=CrossEntropy(), 
    epochs=3,
    verbose=2
)
stop = datetime.now()

print("Elapsed:", stop - start)

In [None]:
train_accuracy = accuracy(classifier, x_train_features, y_train, verbose=2)
test_accuracy  = accuracy(classifier, x_test_features,  y_test,  verbose=2)

In [None]:
print("Train accuracy:", train_accuracy)
print("Test accuracy:", test_accuracy)

In [None]:
np.save('layer0_weights_0.npy', classifier.layers[0].weights.shares0)
np.save('layer0_weights_1.npy', classifier.layers[0].weights.shares1)
np.save('layer0_bias_0.npy', classifier.layers[0].bias.shares0)
np.save('layer0_bias_1.npy', classifier.layers[0].bias.shares1)

np.save('layer2_weights_0.npy', classifier.layers[2].weights.shares0)
np.save('layer2_weights_1.npy', classifier.layers[2].weights.shares1)
np.save('layer2_bias_0.npy', classifier.layers[2].bias.shares0)
np.save('layer2_bias_1.npy', classifier.layers[2].bias.shares1)