In [1]:
#Import libraries
import os
import tensorflow as tf
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit

#Download dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [2]:
# normalize inputs from 0-255 to 0.0-1.0
x_train = x_train.astype('double')
x_test = x_test.astype('double')
x_train = x_train / 255.0
x_test = x_test / 255.0

In [3]:
x_train=np.dot(x_train[...,:3], [0.299, 0.587, 0.114])
x_test=np.dot(x_test[...,:3], [0.299, 0.587, 0.114])
# add empty color dimension
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

In [4]:
#Split the dataset into train and valid
s = StratifiedShuffleSplit(n_splits=5, random_state=0, test_size=1/6)
train_index, valid_index = next(s.split(x_train, y_train))
x_valid, y_valid = x_train[valid_index], y_train[valid_index]
x_train, y_train = x_train[train_index], y_train[train_index]
print(x_train.shape, x_valid.shape, x_test.shape)

(41666, 32, 32, 1) (8334, 32, 32, 1) (10000, 32, 32, 1)


In [5]:
# load tendorflow model
tf_model = tf.keras.models.load_model('my_model.h5')
tf_model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 30, 30, 32)        320       
_________________________________________________________________
activation_21 (Activation)   (None, 30, 30, 32)        0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 28, 28, 32)        9248      
_________________________________________________________________
activation_22 (Activation)   (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
dropout_21 (Dropout)         (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 12, 12, 64)       

In [6]:
# Evaluate the model on test set
score = tf_model.evaluate(x_test, y_test, verbose=0)

# Print test accuracy
print('\n', 'Test accuracy:', score[1])


 Test accuracy: 0.7405


In [7]:
%%time
N = 1000
tf_y = tf_model.predict(x_test[:N])
tf_y_args = np.argmax(tf_y, -1)
scores = np.zeros(N)
for i in range(N):
    scores[i] = int(tf_y_args[i] == y_test[i])

print('My acc:', np.sum(scores) / N)

My acc: 0.752
CPU times: user 610 ms, sys: 8.75 ms, total: 619 ms
Wall time: 359 ms


In [28]:
# create udnn model
import udnn
dtype="int8"
model = udnn.Model()
model.add_layer("conv0", udnn.Conv2D(x_train.shape[1:], dtype, 3, 32))
model.add_layer("relu0", udnn.ReLu(model.get_layer(0).out.shape[:3], dtype))
model.add_layer("conv1", udnn.Conv2D(model.get_layer(1).out.shape[:3], dtype, 3, 32))
model.add_layer("relu1", udnn.ReLu(model.get_layer(2).out.shape[:3], dtype))
model.add_layer("maxpool1", udnn.MaxPooling(model.get_layer(3).out.shape[:3], dtype, 2))

model.add_layer("conv2", udnn.Conv2D(model.get_layer(4).out.shape[:3], dtype, 3, 64))
model.add_layer("relu2", udnn.ReLu(model.get_layer(5).out.shape[:3], dtype))
model.add_layer("maxpool2", udnn.MaxPooling(model.get_layer(6).out.shape[:3], dtype, 2))

model.add_layer("flatten3", udnn.Flatten(model.get_layer(7).out.shape[:3], dtype))
model.add_layer("dense3", udnn.Dense(model.get_layer(8).out.shape[:3], dtype, 512))
model.add_layer("relu3", udnn.ReLu(model.get_layer(9).out.shape[:3], dtype))
model.add_layer("dense4", udnn.Dense(model.get_layer(10).out.shape[:3], dtype, 10))
model.add_layer("sigmoid4", udnn.Sigmoid(model.get_layer(11).out.shape[:3], dtype))

weights_cast = []
for w in tf_model.weights:
    weights_cast.append(tf.cast(w, tf.int8))
model.load_weights(weights_cast)

In [29]:
%%time
N = 1000

ys = np.zeros_like(tf_y)
for idx, x in enumerate(x_test[:N]):
    ys[idx] = np.array(model.predict(x.astype(dtype))).flatten()

ys_args = np.argmax(ys, -1)
scores = np.zeros(N)
for i in range(N):
    scores[i] = int(ys_args[i] == y_test[i])

print('My acc:', np.sum(scores) / N)

My acc: 0.103
CPU times: user 31.9 s, sys: 31.6 ms, total: 32 s
Wall time: 32.2 s
