In [5]:
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [4]:
print(tf.__version__)

2.15.0


In [21]:
physical_devices = tf.config.list_physical_devices('GPU')
try:
  tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
  # Invalid device or cannot modify virtual devices once initialized.
  pass

has GPU


In [22]:
model_save_folder = "./joint_ae"

In [23]:
import glob
img_folder = "./val2017/"
img_paths = sorted(glob.glob(img_folder+'/*'))
# print(img_paths)
print("Number of imgs in the folder:", len(img_paths))

Number of imgs in the folder: 5


In [24]:
# label
label_path = './data/ImageNetLabels.txt'
with open(label_path, "r", encoding="UTF8") as lbfile:
    labels = lbfile.read().splitlines()

# ground truths
gt_path = './data/caffe_clsloc_validation_ground_truth.txt'
with open(gt_path,"r") as lbfile:
    lines = lbfile.readlines()
    gts = []
    for x in lines:
        gts.append(int(x.split(' ')[1].splitlines()[0]))
# gts = np.array(gts) + 1
gts = np.array(gts)

## Autoencoder

### Regularizer

In [25]:
import tensorflow_addons as tfa

C = 1e-4

def orthogonal_reg(w):  # 1703.01827
  units = w.shape[-1]
  w = tf.reshape(w, (-1, units))
  w = tf.transpose(w) @ w
  
  return (C/2)*tf.linalg.norm(w - tf.eye(units))

### Prepare Autoencoder

In [27]:
img_height, img_width = 224,224

In [29]:
print(img_height, img_width)

encoder_input = layers.Input(shape=(img_height, img_width, 3))

# Encoder

initializer = tf.keras.initializers.Orthogonal()
encoder_x = layers.Conv2D(16, (9, 9), 
                strides=7, 
                activation="relu", 
                padding="same", 
                kernel_initializer=initializer
                )(encoder_input)

encoder_x = layers.Conv2D(10, (3, 3), 
                strides=1,
                activation="relu", 
                padding="same", 
                kernel_initializer=initializer,
                name='encoder_out'
                )(encoder_x)

encoder_model = keras.Model(encoder_input, encoder_x,  name='enocder')
encoder_model.summary()

224 224
Model: "enocder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv2d_6 (Conv2D)           (None, 32, 32, 16)        3904      
                                                                 
 encoder_out (Conv2D)        (None, 32, 32, 10)        1450      
                                                                 
Total params: 5354 (20.91 KB)
Trainable params: 5354 (20.91 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [37]:
def dropout_tail(X):
    total_dim = tf.shape(X)[-1]
    tail_len = tf.random.uniform([1,], minval=0, maxval=total_dim, dtype=tf.int32)
    tail_len = tf.math.minimum(tail_len, total_dim)
    head_len = total_dim - tail_len
    mask = tf.concat((tf.ones([tf.shape(X)[1], tf.shape(X)[2], head_len[0]]), tf.zeros((tf.shape(X)[1], tf.shape(X)[2],tail_len[0]))), axis=-1)
    X = X*mask
    return X

In [38]:
# Decoder
_,w,h,c = encoder_model.get_layer('encoder_out').output_shape
decoder_input = layers.Input(shape=(w,h,c))
decoder_x = layers.Conv2DTranspose(64, (9, 9), 
                                strides=7, 
                                activation="relu", 
                                padding="same",
                                name="decoder_input"
                                )(decoder_input)
decoder_x = layers.Conv2D(64, (5, 5), strides=1, activation="relu",padding="same")(decoder_x) + decoder_x

decoder_x = layers.Conv2D(64, (5, 5), strides=1, activation="relu",padding="same")(decoder_x)

decoder_x = layers.Conv2D(64, (5, 5), strides=1, activation="relu",padding="same")(decoder_x) + decoder_x

decoder_out7 = layers.Conv2D(3, (3, 3),  padding="same")(decoder_x)
decoder_out7 = tf.clip_by_value(decoder_out7, clip_value_min=0, clip_value_max=1)  

# Autoencoder
decoder_model = keras.Model(decoder_input, decoder_out7,  name='decoder')
decoder_model.summary()

Model: "decoder"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_8 (InputLayer)        [(None, 32, 32, 10)]         0         []                            
                                                                                                  
 decoder_input (Conv2DTrans  (None, 224, 224, 64)         51904     ['input_8[0][0]']             
 pose)                                                                                            
                                                                                                  
 conv2d_11 (Conv2D)          (None, 224, 224, 64)         102464    ['decoder_input[0][0]']       
                                                                                                  
 tf.__operators__.add_4 (TF  (None, 224, 224, 64)         0         ['conv2d_11[0][0]',     

In [39]:
class CustomTrainStep(tf.keras.Model):
    def __init__(self, n_gradients, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.n_gradients = tf.constant(n_gradients, dtype=tf.int32)
        self.n_acum_step = tf.Variable(0, dtype=tf.int32, trainable=False)
        self.gradient_accumulation = [tf.Variable(tf.zeros_like(v, dtype=tf.float32), trainable=False) for v in self.trainable_variables]

    def train_step(self, data):
        self.n_acum_step.assign_add(1)

        x, y = data
        # Gradient Tape
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)
            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
        # Calculate batch gradients
        gradients = tape.gradient(loss, self.trainable_variables)
        # Accumulate batch gradients
        for i in range(len(self.gradient_accumulation)):
            self.gradient_accumulation[i].assign_add(gradients[i])
 
        # If n_acum_step reach the n_gradients then we apply accumulated gradients to update the variables otherwise do nothing
        tf.cond(tf.equal(self.n_acum_step, self.n_gradients), self.apply_accu_gradients, lambda: None)

        # update metrics
        self.compiled_metrics.update_state(y, y_pred)
        return {m.name: m.result() for m in self.metrics}

    def apply_accu_gradients(self):
        # apply accumulated gradients
        self.optimizer.apply_gradients(zip(self.gradient_accumulation, self.trainable_variables))

        # reset
        self.n_acum_step.assign(0)
        for i in range(len(self.gradient_accumulation)):
            self.gradient_accumulation[i].assign(tf.zeros_like(self.trainable_variables[i], dtype=tf.float32))


In [40]:
encoder_model.load_weights(model_save_folder + "/best_model_save_encoder/variables/variables")
encoder_model.load_weights(model_save_folder + "/best_model_save_decoder/variables/variables")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x2900a3f10>

In [41]:
input_ae = layers.Input(shape=(img_height, img_width, 3))

e_out = encoder_model(input_ae)
e_out = dropout_tail(e_out)
d_out = decoder_model(e_out)

autoencoder_model = keras.Model(inputs=[input_ae], outputs=[d_out], name="ae_model")
# autoencoder_model = CustomTrainStep(n_gradients=5, inputs=[input_ae], outputs=[d_out], name="ae_model")
autoencoder_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='mse')
autoencoder_model.summary()



Model: "ae_model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_9 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 enocder (Functional)        (None, 32, 32, 10)           5354      ['input_9[0][0]']             
                                                                                                  
 tf.compat.v1.shape_10 (TFO  (4,)                         0         ['enocder[2][0]']             
 pLambda)                                                                                         
                                                                                                  
 tf.__operators__.getitem_1  ()                           0         ['tf.compat.v1.shape_10

## Split the Encoder and Decoder

In [42]:
def cut_encoder_decoder(autoencoder_model, layerName = "decoder", verbose=False):
    decoder_input_index = None
#     layerName = layerName
    for idx, layer in enumerate(autoencoder_model.layers):
        if layer.name == layerName:
            decoder_input_index = idx
            break

    if verbose: print("Decoder index:", decoder_input_index,"\n---")

    # encoder = keras.Model(autoencoder_tail_model.get_layer("input_4").input, autoencoder_tail_model.get_layer("encoder").output, name='encoder1')

    encoder = tf.keras.Sequential(name='encoder1')
    for layer in autoencoder_model.layers[:2]:
        encoder.add(layer)

    # encoder.compile()
    if verbose: encoder.summary()

    decoder = tf.keras.Sequential(name='decoder1')
    for layer in autoencoder_model.layers[decoder_input_index:]:
        decoder.add(layer)


    # encoder.compile()
    if verbose: decoder.summary()
    return encoder, decoder

In [43]:
encoder_pnc, decoder_pnc = cut_encoder_decoder(autoencoder_model, verbose=True)

Decoder index: 21 
---
Model: "encoder1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 enocder (Functional)        (None, 32, 32, 10)        5354      
                                                                 
Total params: 5354 (20.91 KB)
Trainable params: 5354 (20.91 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Model: "decoder1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 decoder (Functional)        (None, 224, 224, 3)       361027    
                                                                 
Total params: 361027 (1.38 MB)
Trainable params: 361027 (1.38 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
