In [None]:
##### Copyright 2020 Google LLC
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Codebook based quantization

Codebook based quantizaion is a non-uniform quantization technique that maps each weight or activation value to the index of a value in the codebook. This allows us to compress weights/activations even further with neglibible loss in performance. We will demonstrate this by training an object classification model and applying codebook quantization to the activation with the most values.

In [None]:
from tensorflow.keras.regularizers import *
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import *
from tensorflow.keras.datasets import *
from tensorflow.keras.utils import to_categorical

from qkeras import *
from qkeras.codebook import *


def get_data(name, sample_size=1.0):
  (x_train, y_train), (x_test, y_test) = globals()[name].load_data()

  if len(x_train.shape) == 3:
    x_train = x_train.reshape(x_train.shape + (1,))
    x_test = x_test.reshape(x_test.shape + (1,))

  x_train = x_train.astype("float32")
  x_test = x_test.astype("float32")

  mean = np.mean(x_train,axis=(0,1,2,3))
  std = np.std(x_train,axis=(0,1,2,3))
  x_train = (x_train-mean)/(std+1e-7)
  x_test = (x_test-mean)/(std+1e-7)

  y_train_c = to_categorical(y_train, np.max(y_train) + 1)
  y_test_c = to_categorical(y_test, np.max(y_test) + 1)

  if sample_size != 1.0:
    indexes = np.asarray(range(x_train.shape[0]))
    np.random.shuffle(indexes)
    indexes = indexes[:int(x_train.shape[0] * sample_size)]

    x_train = x_train[indexes]
    y_train_c = y_train_c[indexes]

  return (x_train, y_train_c), (x_test, y_test_c)


def get_model(
  name, X_train, y_train, X_test, y_test,
  blocks=[[32], [64], [128]],
  quantizer_list=[
      "quantized_relu_po2(4,4)",
      "quantized_relu_po2(4,4)"
  ],
  use_stochastic_rounding=0,
  l1v=None,
  epochs=10,
  load_weights=True):

  if l1v is None:
    l1v = [0.0] * len(blocks)

  X_shape = X_train.shape[1:]
  x_i = x = Input(X_shape)

  for b, block in enumerate(blocks):
    # we are assuming we want to quantize the block that has sparsity
    # so let's add dropout to the next layer

    if b >= 1 and l1v[b-1] != 0.0:
      x = Dropout(0.3, name=f"drop{b}")(x)

    for i in range(len(block)):
      x = QConv2D(
          block[i], kernel_size=(3,3), strides=(2,2), padding="same",
          kernel_quantizer=f"quantized_bits(4, use_stochastic_rounding={use_stochastic_rounding})",
          bias_quantizer=f"quantized_po2(4, use_stochastic_rounding={use_stochastic_rounding})",
          kernel_regularizer=l1(l1v[b]) if l1v[b] != 0.0 else None,
          name=f"d{b}_{i}")(x)
      if i != len(block) - 1:
        if quantizer_list[b] in ["linear", "relu", "softmax", "sigmoid"]:
          x = Activation(quantizer_list[b], name=f"a{b}_{i}")(x)
        else:
          x = QActivation(quantizer_list[b], name=f"a{b}_{i}")(x)
      else:
        x = QBatchNormalization(name=f"bn{b}_{i}")(x)
    if b < len(blocks) - 1:
      if quantizer_list[b] in ["linear", "relu", "softmax", "sigmoid"]:
        x = Activation(quantizer_list[b], name=f"a{b}_{len(block)-1}")(x)
      else:
        x = QActivation(quantizer_list[b], name=f"a{b}_{len(block)-1}")(x)
    else:
      if len(block) > 0:
        x = QActivation(f"quantized_relu(6,2, use_stochastic_rounding={use_stochastic_rounding})", 
                        name=f"a{b}_{len(block)-1}")(x)
      x = Flatten(name="flatten")(x)
      x = QDense(
          y_train.shape[1], name=f"d{len(blocks)-1}_{len(block)}")(x)
      x = Activation("softmax", name=f"a{len(blocks)-1}_{len(block)}")(x)

  model = Model(inputs=x_i, outputs=x)
  model.summary()

  model.compile(loss="categorical_crossentropy", optimizer=Adam(0.001), metrics=["acc"])

  try:
    if load_weights and os.path.isfile(name + ".h5"):
      print('Found file...')
      model.load_weights(name + ".h5")
    else:
      model.fit(X_train, y_train, validation_data=(X_test, y_test),
                batch_size=128, epochs=epochs, verbose=2)
      model.save_weights(name + ".h5")
  except:
    model.fit(X_train, y_train, validation_data=(X_test, y_test),
              batch_size=128, epochs=epochs, verbose=2)
    model.save_weights(name + ".h5")

  return model


name = "cifar10"
(X_train, y_train), (X_test, y_test) = get_data(name, sample_size=1)
model = get_model(
  name, X_train, y_train, X_test, y_test,
  blocks=[[32, 32], [64, 64], [128]],
  quantizer_list=["quantized_relu(6,2)", "quantized_relu(6,2)"],
  epochs=50,
  load_weights=True
)

In [None]:
from qkeras.codebook import *

cb_tables, models, km_models = activation_compression(
  model, 
  {'loss' : "categorical_crossentropy", 'metrics' : ["acc"]},
  [2], 3, 
  X_train, y_train, 
  X_test, y_test,
  sample_size=0.3
)

In [None]:
q = models[0].layers[-1].quantizer
in_table, out_table = cb_tables[0]
print(q)
print('in_table:', in_table)
print('out_table:', out_table)

In [None]:
for i,x in enumerate(q.range()):
  print(f'{x:8}, {in_table[out_table[i]]:6}')

# Weight compression using codebook quantization

In [None]:
conv_weights = model.layers[1].weights[0].numpy()
print(conv_weights.shape)
quantizer = model.layers[1].kernel_quantizer_internal
print(quantizer)
axis = 3
bits = 3
index_table, codebook_table = weight_compression(
  conv_weights, 
  bits, 
  axis, 
  quantizer)

In [None]:
print(codebook_table.shape)
codebook_table[0]

In [None]:
print(index_table.shape)
index_table[:,:,:,0]

In [None]:
new_conv_weights = np.zeros(conv_weights.shape)
for i in range(conv_weights.shape[axis]):
  new_conv_weights[:,:,:,i] = codebook_table[i][index_table[:,:,:,i]]

In [None]:
new_conv_weights[:,:,:,0]

In [None]:
conv_weights[:,:,:,0]

In [None]:
bias = model.layers[1].weights[1].numpy()
model.layers[1].set_weights([new_conv_weights, bias])

In [None]:
model.evaluate(X_test, y_test)