## Conversion from convolution filter to binary filter

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers


2021-12-20 01:37:46.880314: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-20 01:37:46.880358: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
assert tf.__version__ >= "2.7.0"

In [11]:
tf.config.run_functions_eagerly(True)
tf.executing_eagerly()
tf.data.experimental.enable_debug_mode()


In [12]:
# Model / data parameters
num_classes = 20
input_shape = (32, 32, 3)

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data(label_mode='coarse')

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255

print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to one-hot representations
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [13]:
@tf.custom_gradient
def generate_B(W,u): # generate B given weight W and a specified u
  def grad(upstream): # define gradient by ourselves (dc/dW, dc/du)
    dB_dW = 1
    return upstream * dB_dW, 1
  std_W = tf.math.reduce_std(W) # calculate std(W)
  mean_W =  tf.math.reduce_mean(W) # calculate mean(W)
  Bi = tf.math.sign(W - mean_W + u * mean_W) #calculate Bi
  return Bi, grad

In [14]:
from sklearn.linear_model import LinearRegression
def generate_alpha(W,list_of_B): # generate alpha given weight W and list of B containing all Bi
  W = tf.reshape(W, [-1]).numpy() #vectorize W
  list_of_reshape_B = []
  for i in list_of_B: # vectorize Bi
    list_of_reshape_B.append(tf.reshape(i, [-1]))
  B = tf.stack(list_of_reshape_B, axis=1).numpy() # put Bi together into a large matrix
  reg = LinearRegression(fit_intercept=False).fit(B, W) # linear regression
  alpha = reg.coef_ # coefficient of linear regression, which is alpha
  return alpha



In [15]:
@tf.custom_gradient
def H_v(R, v): # calculate H_v given input R and shift parameter v
  def grad(upstream):  # define gradient by ourselves (dc/dR, dc/dv)
    return upstream, tf.math.reduce_sum(upstream)
  return 2 * tf.cast(tf.math.greater(tf.math.add(R, v), tf.constant(0.5)), tf.float32) - 1, grad # calculate H_v

Initialization of ABCConv class for M=3,N=3

In [28]:
import tensorflow as tf
import tensorflow.keras as keras
import keras.layers as layers
import numpy as np

# number of binary activations
N = 3
class ABCConv(layers.Conv2D): # subclass of layers.Conv2D
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.betas = self.add_weight(
            shape=(N,), initializer="random_normal", trainable=True
        )
        self.vs = self.add_weight(shape=(N,), initializer="random_normal", trainable=True)

    def convolution_op(self, inputs, kernel): # override convolution_op
        ## Multiple binary activations and bitwise convolution
        
        # given beta's and v's, get all binary activations
        binary_activations = []
        list_of_A = []
        for i in range(0,N):
          list_of_A.append(H_v(inputs,self.vs[i]))
        

        # get all B
        M = 3
        list_of_u = []
        for i in range(1,M+1): # all u
          list_of_u.append(-1 + (i -1.0) * 2 / (M-1))
        list_of_B = []
        for i in range(1,M+1): # all B
          list_of_B.append(generate_B(kernel, list_of_u[i-1]))
       
        
        
        # get alphas 
        alphas = generate_alpha(kernel,list_of_B)
        # call individual convs
        convs = []
        for j in range(N):
          for i in range(M):
            convs.append(alphas[i]* self.betas[j] * tf.nn.conv2d(
                list_of_A[j],
                list_of_B[i],
                padding="VALID",
                strides=list(self.strides),
                name=self.__class__.__name__ + "conv" + str(i) + str(j),
              ))
        return tf.add_n(convs) # linear combination of all convolution results
        

In [29]:
model_LeNet_3_3 = keras.Sequential(
    [
        keras.Input(shape=input_shape),

        
        ABCConv(6, kernel_size=(5, 5), activation=None),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.BatchNormalization(),
        

        ABCConv(16, kernel_size=(5, 5), activation=None),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.BatchNormalization(),
        
        
        
        layers.Flatten(),
        layers.Dense(512, activation="relu"),
        layers.Dense(128, activation="relu"),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model_LeNet_3_3.summary()



Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 abc_conv_16 (ABCConv)       (None, 28, 28, 6)         462       
                                                                 
 max_pooling2d_18 (MaxPoolin  (None, 14, 14, 6)        0         
 g2D)                                                            
                                                                 
 batch_normalization_18 (Bat  (None, 14, 14, 6)        24        
 chNormalization)                                                
                                                                 
 abc_conv_17 (ABCConv)       (None, 10, 10, 16)        2422      
                                                                 
 max_pooling2d_19 (MaxPoolin  (None, 5, 5, 16)         0         
 g2D)                                                            
                                                      

In [None]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

batch_size = 64
epochs = 5

model_LeNet_3_3.compile(loss="categorical_crossentropy", optimizer="adam", metrics=[tf.keras.metrics.CategoricalAccuracy(),tf.keras.metrics.TopKCategoricalAccuracy(k=3)],run_eagerly=True)

model_LeNet_3_3.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f97123709d0>

In [33]:

# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model_LeNet_3_3.evaluate(x_test, y_test, batch_size=128)
print("test loss, test acc:", results)

Evaluate on test data
test loss, test acc: [2.349271774291992, 0.3244999945163727, 0.5701000094413757]
