In [1]:
!pip install tensorflow_datasets



In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="2"

In [3]:
import tensorflow as tf

from tensorflow.keras.applications import vgg16
from tensorflow.keras.applications.vgg16 import preprocess_input
import matplotlib.pyplot as plt
import numpy as np
import tensorflow_datasets as tfds
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
tf.config.experimental.set_memory_growth(physical_devices[0], True)
#tf.config.experimental.set_memory_growth(physical_devices[1], True)


#used to fix bug in keras preprocessing scope
temp = tf.zeros([4, 32, 32, 3])  # Or tf.zeros
preprocess_input(temp)
print("processed")

processed


In [4]:
IMAGE_SIZE = (224, 224)
TRAIN_SIZE = 50000
VALIDATION_SIZE = 10000
BATCH_SIZE_PER_GPU = 96
global_batch_size = (BATCH_SIZE_PER_GPU * 1)
NUM_CLASSES = 10

Dataset code

In [5]:
def flip(x: tf.Tensor) -> tf.Tensor:
    """Flip augmentation

    Args:
        x: Image to flip

    Returns:
        Augmented image
    """
    x = tf.image.random_flip_left_right(x)
    x = tf.image.random_flip_up_down(x)

    return x

def color(x: tf.Tensor) -> tf.Tensor:
    """Color augmentation

    Args:
        x: Image

    Returns:
        Augmented image
    """
    x = tf.image.random_hue(x, 0.08)
    x = tf.image.random_saturation(x, 0.6, 1.6)
    x = tf.image.random_brightness(x, 0.05)
    x = tf.image.random_contrast(x, 0.7, 1.3)
    return x

def rotate(x: tf.Tensor) -> tf.Tensor:
    """Rotation augmentation

    Args:
        x: Image

    Returns:
        Augmented image
    """

    return tf.image.rot90(x, tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))

def zoom(x: tf.Tensor) -> tf.Tensor:
    """Zoom augmentation

    Args:
        x: Image

    Returns:
        Augmented image
    """

    # Generate 20 crop settings, ranging from a 1% to 20% crop.
    scales = list(np.arange(0.8, 1.0, 0.01))
    boxes = np.zeros((len(scales), 4))

    for i, scale in enumerate(scales):
        x1 = y1 = 0.5 - (0.5 * scale)
        x2 = y2 = 0.5 + (0.5 * scale)
        boxes[i] = [x1, y1, x2, y2]

    def random_crop(img):
        # Create different crops for an image
        crops = tf.image.crop_and_resize([img], boxes=boxes, box_indices=np.zeros(len(scales)), crop_size=IMAGE_SIZE)
        # Return a random crop
        return crops[tf.random.uniform(shape=[], minval=0, maxval=len(scales), dtype=tf.int32)]


    choice = tf.random.uniform(())

    # Only apply cropping 50% of the time
    return tf.cond(choice < 0.5, lambda: x, lambda: random_crop(x))

def normalize(input_image):
  return preprocess_input(input_image)

@tf.function
def load_image_train(datapoint):
  input_image, label = tf.image.resize(datapoint["image"], IMAGE_SIZE), datapoint['label']
  # if tf.random.uniform(()) > 0.5:
  #   input_image = tf.image.flip_left_right(input_image)
  augmentations = [flip, color, zoom, rotate]
  for f in augmentations:
    input_image = tf.cond(tf.random.uniform(()) > 0.75, lambda: f(input_image), lambda: input_image)

  #input_image = preprocess_input(input_image)
  input_image = normalize(input_image)

  return input_image, tf.one_hot(label, depth=NUM_CLASSES)

@tf.function
def load_image_test(datapoint):
  input_image, label = tf.image.resize(datapoint["image"], IMAGE_SIZE), datapoint['label']
  #input_image = preprocess_input(input_image)

  input_image = normalize(input_image)

  return input_image, tf.one_hot(label, depth=NUM_CLASSES)

In [6]:
dataset, info = tfds.load('cifar10', with_info=True)

In [7]:
for example in dataset['train'].take(1):
  print(example["label"])

tf.Tensor(3, shape=(), dtype=int64)


In [8]:
info

tfds.core.DatasetInfo(
    name='cifar10',
    version=1.0.2,
    description='The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.',
    homepage='https://www.cs.toronto.edu/~kriz/cifar.html',
    features=FeaturesDict({
        'image': Image(shape=(32, 32, 3), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10),
    }),
    total_num_examples=60000,
    splits={
        'test': 10000,
        'train': 50000,
    },
    supervised_keys=('image', 'label'),
    citation="""@TECHREPORT{Krizhevsky09learningmultiple,
        author = {Alex Krizhevsky},
        title = {Learning multiple layers of features from tiny images},
        institution = {},
        year = {2009}
    }""",
    redistribution_info=,
)

make the upscaled cifar dataset

In [9]:
train = dataset['train'].map(load_image_train, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_dataset = train.shuffle(buffer_size=1000).batch(global_batch_size).repeat()
train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)


test_dataset = dataset['test'].map(load_image_test, num_parallel_calls=tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(global_batch_size).repeat()
test_dataset = test_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)


In [10]:
tf.keras.backend.set_floatx('float32')

In [11]:
model = tf.keras.models.load_model('./base_model_cifar10_vgg16.h5')

In [12]:
model.compile(optimizer=tf.optimizers.SGD(learning_rate=.01, momentum=.9, nesterov=True), loss='mse', metrics=['acc'])

In [13]:
model.evaluate(test_dataset, steps=VALIDATION_SIZE//global_batch_size)



[0.009073598161474085, 0.93990386]

In [14]:
for i, layer in enumerate(model.layers):
  print(f'{i} layer {layer.name}')

0 layer input_2
1 layer block1_conv1
2 layer block1_conv2
3 layer block1_pool
4 layer block2_conv1
5 layer block2_conv2
6 layer block2_pool
7 layer block3_conv1
8 layer block3_conv2
9 layer block3_conv3
10 layer block3_pool
11 layer block4_conv1
12 layer block4_conv2
13 layer block4_conv3
14 layer block4_pool
15 layer block5_conv1
16 layer block5_conv2
17 layer block5_conv3
18 layer block5_pool
19 layer global_average_pooling2d_2
20 layer dense_2
21 layer predictions


build training output

In [15]:
get_output = tf.keras.Model(inputs=model.input, outputs=[model.layers[16].output, model.layers[17].output])

generator for training replacement blocks

In [16]:
import math
class LayerBatch(tf.keras.utils.Sequence):
    
    def __init__(self, input_model, dataset):
        self.input_model = input_model
        self.dataset = dataset.__iter__()
        
    def __len__(self):
        return math.ceil(TRAIN_SIZE // global_batch_size )
    
    def __getitem__(self, index):
        X, y = self.input_model(next(self.dataset))
        return X, y
    
import math
class LayerTest(tf.keras.utils.Sequence):
    
    def __init__(self, input_model, dataset):
        self.input_model = input_model
        self.dataset = dataset.__iter__()
        
    def __len__(self):
        return math.ceil(VALIDATION_SIZE // global_batch_size )
    
    def __getitem__(self, index):
        X, y = self.input_model(next(self.dataset))
        return X, y

In [17]:
def build_replacement(get_output):
    inputs = tf.keras.Input(shape=get_output.output[0].shape[1::])
    X = tf.keras.layers.SeparableConv2D(name=f'sep_conv_{build_replacement.counter}', filters=get_output.output[1].shape[-1]//4, 
                                        kernel_size= (3,3),
                                        padding='Same')(inputs)
    X = tf.keras.layers.BatchNormalization(name=f'batch_norm_{build_replacement.counter}')(X)
    X = tf.keras.layers.ReLU(name=f'relu_{build_replacement.counter}')(X)
    
    build_replacement.counter += 1
    
    X = tf.keras.layers.concatenate([inputs, X])

    X = tf.keras.layers.SeparableConv2D(name=f'sep_conv_{build_replacement.counter}', filters=get_output.output[1].shape[-1],
                                        kernel_size=(3,3), 
                                        padding='Same')(X)
    X = tf.keras.layers.BatchNormalization(name=f'batch_norm_{build_replacement.counter}')(X)
    X = tf.keras.layers.ReLU(name=f'relu_{build_replacement.counter}')(X)
    replacement_layers = tf.keras.Model(inputs=inputs, outputs=X)
    
    build_replacement.counter += 1
    
    return replacement_layers

build_replacement.counter = 0

In [18]:
replacement_layers = build_replacement(get_output)
replacement_len = len(replacement_layers.layers)
layer_train_gen = LayerBatch(get_output, train_dataset)
layer_test_gen = LayerTest(get_output, test_dataset)



In [19]:
replacement_layers.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
sep_conv_0 (SeparableConv2D)    (None, None, None, 1 70272       input_1[0][0]                    
__________________________________________________________________________________________________
batch_norm_0 (BatchNormalizatio (None, None, None, 1 512         sep_conv_0[0][0]                 
__________________________________________________________________________________________________
relu_0 (ReLU)                   (None, None, None, 1 0           batch_norm_0[0][0]               
____________________________________________________________________________________________

In [20]:

MSE = tf.losses.MeanSquaredError()

optimizer=tf.keras.optimizers.SGD(.1, momentum=.9, nesterov=True)
replacement_layers.compile(loss=MSE, optimizer=optimizer)

In [21]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(patience=5, min_lr=.0001, factor=.1, verbose=1)
early_stop = tf.keras.callbacks.EarlyStopping(patience=15, min_delta=.001, restore_best_weights=True, verbose=1)
history = replacement_layers.fit(x=layer_train_gen,
                               epochs=100,
                               steps_per_epoch=TRAIN_SIZE // global_batch_size,
                               validation_data=layer_test_gen,
                               shuffle=False,
                               callbacks=[reduce_lr, early_stop],
                               validation_steps=VALIDATION_SIZE // global_batch_size,
                               verbose=1)

  ...
    to  
  ['...']


  ...
    to  
  ['...']


  ...
    to  
  ['...']


  ...
    to  
  ['...']


Train for 520 steps, validate for 104 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epo

In [35]:
history.history

{'loss': [10.193644351170361,
  6.364497742286095,
  5.37923432543095,
  4.706355944318854,
  4.191736459655615,
  3.770851502551525,
  3.445032045450605,
  3.1920309036960584,
  2.998863816681715,
  2.823746967789606,
  2.6934787549816597,
  2.574781317403585,
  2.4835431972502438,
  2.385934670562353,
  2.3069602714665773,
  2.2342140129568793,
  2.172081383652854,
  2.1152277866515186,
  2.0658672317628413,
  2.012770573043334,
  1.961366357349287,
  1.9240873448548312,
  1.8820963194065272,
  1.8497439897683385,
  1.8033256279653982,
  1.7746948584336124,
  1.7426526348879032,
  1.7208951885557282,
  1.6908925383012239,
  1.6624276414824128,
  1.6343427203182381,
  1.6117270438477718,
  1.5908818035486532,
  1.5690961963054881,
  1.5447566268076383,
  1.5259250807204128,
  1.4984132422205345,
  1.4872284041035058,
  1.4669781968011883,
  1.453260058009804,
  1.4398650827221322,
  1.4226717244894622,
  1.4042315526282263,
  1.3875509101483026,
  1.3763052924927628,
  1.3607990508203

In [23]:

get_output = tf.keras.Model(inputs=model.input, outputs=[model.layers[16].output])
# add in replacement layers
print('building middle of model with replacement layers')
new_joint = tf.keras.Model(inputs=get_output.input, outputs=replacement_layers(get_output.output))

#new_joint.summary()

# build bottom of model
bottom_half = tf.keras.Sequential()
for layer in model.layers[17 + 1::]:
    bottom_half.add(layer)



print('building bottom of model')
bottom_half.build(input_shape=new_joint.output.shape)
bottom_half.summary()
print('combining model')
combined = tf.keras.Model(inputs=new_joint.input, outputs=bottom_half(new_joint.output))

combined.layers[-1].trainable=False
opt = tf.keras.optimizers.RMSprop(lr=0.00005, decay=1e-6)
combined.compile(loss='categorical_crossentropy',
          optimizer=opt,
          metrics=['accuracy'])

building middle of model with replacement layers


To change all layers to have dtype float16 by default, call `tf.keras.backend.set_floatx('float16')`. To change just this layer, pass dtype='float16' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.





To change all layers to have dtype float16 by default, call `tf.keras.backend.set_floatx('float16')`. To change just this layer, pass dtype='float16' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.





To change all layers to have dtype float16 by default, call `tf.keras.backend.set_floatx('float16')`. To change just this layer, pass dtype='float16' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.





To change all layers to have dtype float16 by default, call `tf.keras.backend.set_floatx('float16')`. To change just this layer, pass dtype='float16' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



building bottom of model
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block5_pool (MaxPooling2D)   (None, None, None, 512)   0         
_________________________________________________________________
global_average_pooling2d_2 ( (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                5130      
_________________________________________________________________
predictions (Activation)     (None, 10)                0         
Total params: 5,130
Trainable params: 5,130
Non-trainable params: 0
_________________________________________________________________
combining model


I found this code on stackoverflow but It has some problems even though I think it is mostly on the right track

In [24]:
combined.evaluate(test_dataset, steps=VALIDATION_SIZE // global_batch_size)



[0.19514808212765133, 0.9415064]

## 100 epoch

In [27]:
combined.evaluate(test_dataset, steps=VALIDATION_SIZE // global_batch_size)



[0.1972726293338033, 0.93840146]

In [28]:
/ .9399 - .9384

0.0014999999999999458

In [25]:
combined.summary()

Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0   