In [1]:
import os
import sys
import math
import random
import numpy as np
import tensorflow as tf
from tensorflow import keras
from datetime import datetime

In [2]:
base_path = os.environ['HOME'] + '/cifar100/AdaNet'
if os.path.isdir(base_path):
  pass
else:
  os.mkdir(base_path)

In [3]:
data = tf.keras.datasets.cifar100.load_data()

train = data[0]
test = data[1]

train_image, train_label = train[0].astype(np.float32), train[1]
test_image, test_label= test[0].astype(np.float32), test[1]

train_label = np.reshape(train_label, newshape=[-1])
test_label = np.reshape(test_label, newshape=[-1])

In [4]:
# Generate Dataset obj   
dataset_obj = tf.data.Dataset.from_tensors(
    {'image': train_image, 'label': train_label})
#dataset_obj = dataset_obj.shuffle(50000)
dataset_obj = dataset_obj.unbatch()

# split train-validation dataset
train_dataset = dataset_obj.take(40000)
val_dataset = dataset_obj.skip(40000).take(10000)

test_dataset = tf.data.Dataset.from_tensors(
  {'image': test_image, 'label': test_label})
test_dataset = test_dataset.shuffle(10000).unbatch()

def _preprocessing(dataset, train_mode):
  """
  While train steps, image will be padded random crop and filped(horizontaly)
  And entire steps, per-pixel mean subtracted will be required.
  Args:
    dataset: 'tf.data.Dataset'
    train_mode: 'bool'
  Returns:
    'tf.data.Dataset'
  """
  if train_mode:
    image = dataset['image']
    pad = tf.constant([[2, 2], [2, 2], [0, 0]])
    image = tf.pad(
      tensor=image, paddings=pad)
    image = tf.image.random_crop(
      value=image, size=[32, 32, 3])
    image = tf.image.random_flip_left_right(image=image)
  else:
    image = dataset['image']
    
  image = tf.math.subtract(
    x=image,
    y=tf.reshape(
      tf.math.reduce_mean(image, axis=2), shape=[32, 32, 1]))
  label = dataset['label']
  return (image, label)

train_dataset = train_dataset.map(
  lambda x: _preprocessing(x, train_mode=True))
val_dataset = val_dataset.map(
  lambda x: _preprocessing(x, train_mode=False))
test_dataset = test_dataset.map(
  lambda x: _preprocessing(x, train_mode=False))


train_dataset = train_dataset.shuffle(10000)
val_dataset = val_dataset.shuffle(10000)

In [5]:
class ResidualBlock(keras.layers.Layer):
  def __init__(self, constraint, **kwargs):
    """
    Args:
      constraint(int): Max constraint for variable. (upper lambda)
    """
    super(ResidualBlock, self).__init__()
    self.conv_idt = keras.layers.Conv2D(
      filters=3, kernel_size=[1, 1],
      strides=1, padding='same', name='ConvIdt',
      kernel_constraint=keras.constraints.MaxNorm(constraint))
    self.bn_idt = keras.layers.BatchNormalization()
    self.relu_idt = keras.layers.ReLU()
    
    self.conv_1 = keras.layers.Conv2D(
      filters=64, kernel_size=[1, 1],
      strides=1, padding='same', name='Conv1',
      kernel_constraint=keras.constraints.MaxNorm(constraint))
    self.bn_1 = keras.layers.BatchNormalization()
    self.relu_1 = keras.layers.ReLU()
    
    self.conv_2 = keras.layers.Conv2D(
      filters=64, kernel_size=[1, 1],
      strides=1, padding='same', name='Conv2',
      kernel_constraint=keras.constraints.MaxNorm(constraint))
    self.bn_2 = keras.layers.BatchNormalization()
    self.relu_2 = keras.layers.ReLU()
    
    self.conv_3 = keras.layers.Conv2D(
      filters=3, kernel_size=[1, 1],
      strides=1, padding='same', name='Conv3',
      kernel_constraint=keras.constraints.MaxNorm(constraint))
    self.bn_3 = keras.layers.BatchNormalization()
    self.relu_3 = keras.layers.ReLU()
    
    self.add_last = keras.layers.Add()
    self.relu_last = keras.layers.ReLU()
    
  def call(self, inputs):
    idt_x = self.conv_idt(inputs)
    idt_x = self.bn_idt(idt_x)
    idt_x = self.relu_idt(idt_x)
    x = self.conv_1(inputs)
    x = self.bn_1(x)
    x = self.relu_1(x)
    x = self.conv_2(x)
    x = self.bn_2(x)
    x = self.relu_2(x)
    x = self.conv_3(x)
    x = self.bn_3(x)
    x = self.relu_3(x)
    x = self.add_last([x, idt_x])
    return self.relu_last(x)
  
  
class AdaNetLoss(keras.losses.Loss):
  def __init__(self,
               #weight,
               #num_outputs, 
               #batch_size, 
               #num_classes,
               name='weighted_loss'):
    super(AdaNetLoss, self).__init__()
    self.weight = weight
    #self.num_outputs = num_outputs
    #self.batch_size = batch_size
    #self.num_classes = num_classes
  
  @tf.function
  def call(self, y_true, y_pred):
    object_function = tf.math.subtract(
      tf.constant(1, dtype=tf.float32),
      tf.math.multiply(
        tf.cast(y_pred, dtype=tf.float32), tf.cast(y_true, dtype=tf.float32)))

    #regularization_term = Rademacher Complexity+
    object_function = tf.math.log(
      x=tf.math.add(
        tf.constant(1, dtype=tf.float32),
        tf.math.exp(object_function)))

    #object_function = keras.metrics.Mean()(object_function)
    return object_function

In [6]:
# model subclassing에서 받는 __init__ args 는
# 1. layer 갯수 

#call output은 
# 1. output_list (각 layer 별 output)


class ResAdaNet(keras.Model):
  
  def __init__(self, num_layers, num_classes, name='resadanet'):
    """
    Args:
      num_layers(int): 
      weight(tf.Variable): Trainable. Weight for outputs list.
      num_classes(int):
    """
    super(ResAdaNet, self).__init__()
    self.num_layers = num_layers
    self.residual_block = ResidualBlock(constraint=2)
    self.num_classes = num_classes
    
  def call(self, inputs):
    if self.num_layers == 0: 
      x = self.residual_block(inputs)
      _output = keras.layers.GlobalAveragePooling2D()(x)
      _output = keras.layers.Dense(
        self.num_classes, name="first_iter_output")(_output)
      output_list = [_output]
    else:
      output_list = [
        "output_" + str(x) for x in list(range(1, self.num_layers+1))]
      x = self.residual_block(inputs)
      for i in range(self.num_layers):
        x = self.residual_block(x)
        _output = keras.layers.GlobalAveragePooling2D()(x)
        _output = keras.layers.Dense(
          self.num_classes, name=output_list[i])(_output)
        output_list[i] = _output
        
    """    
    # weight * 
    weighted_pred = tf.tensordot(
      tf.reshape(self.weight, shape=[self.num_layers, 1]),
      tf.reshape(
        output_list, shape=[self.num_layers, -1, self.num_classes]),
      axes=[[0], [0]])
    weighted_pred_mean = tf.math.multiply(
      weighted_pred, 1/self.num_layers)
    """
    outputs = tf.reshape(
      output_list, shape=[self.num_layers, -1, self.num_classes])
    return outputs

In [7]:
# PARAMS
NUM_LAYERS = 10
NUM_CLASSES = 100
EPOCHS = 3
BATCH_SIZE = 30

In [8]:
weight = tf.Variable(
  tf.ones(shape=[NUM_LAYERS, 1], dtype=tf.float32),
  trainable=True, name='Weight')

In [9]:
optimizer = tf.keras.optimizers.Adam()

In [10]:
model = ResAdaNet(
  num_layers=NUM_LAYERS,
  num_classes=NUM_CLASSES)

In [11]:
for epoch in range(EPOCHS):
  print("Start of epoch: {}".format(epoch))
  
  for step, train in enumerate(train_dataset.batch(BATCH_SIZE)):
    with tf.GradientTape() as tape:
      
      trainable_list = model.trainable_weights
      trainable_list.append(weight)
      
      tape.watch(trainable_list)
      
      train_data = train[0]
      #train_label = tf.one_hot(train[1], depth=100)
      train_label = train[1]
      
      pred = model(train_data) # (num_layer, batch, num_classes)
      weighted_pred = tf.tensordot(
        weight, pred, axes=[[0], [0]])
      weighted_pred = tf.reshape(
        weighted_pred, shape=[BATCH_SIZE, NUM_CLASSES])
      #loss = AdaNetLoss()(y_pred=weighted_pred, y_true=train_label)
      loss = keras.losses.SparseCategoricalCrossentropy()(y_pred=weighted_pred, y_true=train_label)
      
    #trainable_list = model.trainable_weights
    #trainable_list.append(weight)
    
    grads = tape.gradient(
      target=loss, sources=trainable_list)
    #print(grads[-1])
    optimizer.apply_gradients(zip(grads, trainable_list))
    
    if step % 100 == 0:
      print("Step{}: Loss {}".format(step, loss))
      print(train[1][:10])
      print("predicted:", tf.argmax(weighted_pred)[:10])

Start of epoch: 0
Step0: Loss 12.73680305480957
tf.Tensor([93 18 11 62 45 52 36 64 95 73], shape=(10,), dtype=int64)
predicted: tf.Tensor([11  2  4 26  4 26  2  2  9 11], shape=(10,), dtype=int64)
Step100: Loss 5.334727764129639
tf.Tensor([79 17 76 78 40 17 48  7 38 94], shape=(10,), dtype=int64)
predicted: tf.Tensor([17 17  0  8  0  0  0  0 17 17], shape=(10,), dtype=int64)
Step200: Loss 4.605170249938965
tf.Tensor([24 25 72 39 26 68 11 28  5 54], shape=(10,), dtype=int64)
predicted: tf.Tensor([0 0 0 0 0 0 0 0 0 0], shape=(10,), dtype=int64)
Step300: Loss 4.605170249938965
tf.Tensor([29 58 48 61 11  3 14 25 68 73], shape=(10,), dtype=int64)
predicted: tf.Tensor([0 0 0 0 0 0 0 0 0 0], shape=(10,), dtype=int64)
Step400: Loss 4.605170249938965
tf.Tensor([56 85 21 14 23 63 49 49 72 40], shape=(10,), dtype=int64)
predicted: tf.Tensor([0 0 0 0 0 0 0 0 0 0], shape=(10,), dtype=int64)


KeyboardInterrupt: 