In [18]:
import glob
import os
import numpy as np

BASE_DIR = os.path.join("..", "..")

In [19]:
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, TimeDistributed, Activation
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import GRU as RNN

EMBEDDING_UNITS = 1024//8
N_CLASSES = 2

FLOATX='float32'
tf.keras.backend.set_image_data_format('channels_last')

# Keras API
We will be using Keras in tensorflow to create our models. Keras has two APIs for creating models: sequential and functional. Since our models will have multiple inputs and outputs, we will be using functional API.

# Embedding TPA

In [20]:
# See build_TPA_embedding in Conclusions.

Now let's build embeddings for TPA with view-id "dummy":

In [21]:
dummy_input, dummy_output = build_TPA_embedding("dummy")
print([dummy_input, dummy_output])

[<tf.Tensor 'TPAdummy_input:0' shape=(None, None, 32, 32, 1) dtype=float32>, <tf.Tensor 'TPAdummy_dense/Identity:0' shape=(None, None, 128) dtype=float32>]


Now let's connect input and output to create a Keras model and see its summary.

In [22]:
model = Model(dummy_input, dummy_output)
print(model)
print(model.summary())

<tensorflow.python.keras.engine.training.Model object at 0x7f5af016f278>
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
TPAdummy_input (InputLayer)  [(None, None, 32, 32, 1)] 0         
_________________________________________________________________
TPAdummy_b1c1 (TimeDistribut (None, None, 32, 32, 64)  640       
_________________________________________________________________
TPAdummy_b1c2 (TimeDistribut (None, None, 32, 32, 64)  36928     
_________________________________________________________________
TPAdummy_b1c3 (TimeDistribut (None, None, 32, 32, 64)  36928     
_________________________________________________________________
TPAdummy_b2m1 (TimeDistribut (None, None, 16, 16, 64)  0         
_________________________________________________________________
TPAdummy_b2c1 (TimeDistribut (None, None, 16, 16, 128) 73856     
______________________________________________________

Let's now build 3 embeddings for TPAs: TPA1, TPA2, TPA3:

In [23]:
TPA_model_ids = [1, 2, 3]
io_TPAs = [build_TPA_embedding(id) for id in TPA_model_ids]
io_TPA1, io_TPA2, io_TPA3 = io_TPAs

In [24]:
Model(*io_TPA1, name="TPA1").summary()

Model: "TPA1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
TPA1_input (InputLayer)      [(None, None, 32, 32, 1)] 0         
_________________________________________________________________
TPA1_b1c1 (TimeDistributed)  (None, None, 32, 32, 64)  640       
_________________________________________________________________
TPA1_b1c2 (TimeDistributed)  (None, None, 32, 32, 64)  36928     
_________________________________________________________________
TPA1_b1c3 (TimeDistributed)  (None, None, 32, 32, 64)  36928     
_________________________________________________________________
TPA1_b2m1 (TimeDistributed)  (None, None, 16, 16, 64)  0         
_________________________________________________________________
TPA1_b2c1 (TimeDistributed)  (None, None, 16, 16, 128) 73856     
_________________________________________________________________
TPA1_b2c2 (TimeDistributed)  (None, None, 16, 16, 128) 147584 

# Merging TPA embeddings

We will first merge 3 TPA embeddings by concatenation.

In [25]:
i_TPAs = [x[0] for x in io_TPAs]
o_TPAs = [x[1] for x in io_TPAs]
TPA_merged = Concatenate(name='view_concat', axis=-1)([*o_TPAs])

In [26]:
rnn = RNN(EMBEDDING_UNITS*len(io_TPAs), activation='tanh', recurrent_activation='sigmoid', return_sequences=True, name = "TPA_GRU")(TPA_merged)

In [27]:
TPA_dense = TimeDistributed(Dense(N_CLASSES, activation=None), name="TPA_dense")(rnn)
TPA_classification = Activation(activation='sigmoid', name='TPA_classification')(TPA_dense)

In [None]:
classifier = Model(i_TPAs, TPA_classification, name="Model_3xTPA")
print(classifier.summary())

# Loss
We will start with EL introudced by Jain, et al. but keep in mind that FRAMES, and FRAME_SHIFT parameters of our training will vary between models. We will implement class Losses_Keras that will be initialized in each training with parameters to pass to loss functions. 

In [None]:
FRAMES_N = 100
FRAME_SHIFT_N = 0
losses = Losses_Keras(frames = FRAMES_N, frame_shift = FRAME_SHIFT_N)
exponential_loss = losses.get_exponential_loss()

In [None]:
type(exponential_loss)

# Compile 

In [None]:
classifier.compile(loss=exponential_loss, optimizer="adam", metrics=['accuracy'])

# Test

In [59]:
FRAMES_N = 50
FRAME_SHIFT_N = 0

BATCH_SIZE = 32
H, W = 32, 32
FRAMES = FRAMES_N
BATCH_SHAPE = (BATCH_SIZE, FRAMES, H, W, 1)
FRAME_BATCH_SHAPE = (BATCH_SIZE, H, W, 1)

data = [np.random.rand(np.prod(BATCH_SHAPE)).astype(np.float32).reshape(BATCH_SHAPE) for i in range(3)]
y_pos = ((np.random.rand(BATCH_SIZE * FRAMES).reshape([BATCH_SIZE, FRAMES]) > 0.5) * 1)
y_neg = ((1 - y_pos) > 0.5) * 1
y_true = np.stack([y_neg, y_pos], axis=-1)
y_true.shape

(32, 50, 2)

In [60]:
losses = Losses_Keras(frames = FRAMES_N, frame_shift = FRAME_SHIFT_N)
exponential_loss = losses.get_exponential_loss()
TPA_model_ids = [1, 2, 3]
io_TPAs = [build_TPA_embedding(id) for id in TPA_model_ids]
io_TPA1, io_TPA2, io_TPA3 = io_TPAs
i_TPAs = [x[0] for x in io_TPAs]
o_TPAs = [x[1] for x in io_TPAs]
TPA_merged = Concatenate(name='view_concat', axis=-1)([*o_TPAs])
rnn = RNN(EMBEDDING_UNITS*len(io_TPAs), activation='tanh', recurrent_activation='sigmoid', return_sequences=True, name = "TPA_GRU")(TPA_merged)
TPA_dense = TimeDistributed(Dense(N_CLASSES, activation=None), name="TPA_dense")(rnn)
TPA_classification = Activation(activation='sigmoid', name='TPA_classification')(TPA_dense)
classifier = Model(i_TPAs, TPA_classification, name="Model_3xTPA")
classifier.compile(loss=exponential_loss, optimizer="adam", metrics=['accuracy'])
print(classifier.summary())

Model: "Model_3xTPA"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
TPA1_input (InputLayer)         [(None, None, 32, 32 0                                            
__________________________________________________________________________________________________
TPA2_input (InputLayer)         [(None, None, 32, 32 0                                            
__________________________________________________________________________________________________
TPA3_input (InputLayer)         [(None, None, 32, 32 0                                            
__________________________________________________________________________________________________
TPA1_b1c1 (TimeDistributed)     (None, None, 32, 32, 640         TPA1_input[0][0]                 
________________________________________________________________________________________

In [61]:
classifier.fit(data, y_true)

Train on 32 samples


<tensorflow.python.keras.callbacks.History at 0x7f5a4419a860>

In [62]:
print("OK")

OK


# Conclusion

In [56]:
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, TimeDistributed, Activation
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import GRU as RNN

EMBEDDING_UNITS = 1024//8
N_CLASSES = 2

FLOATX='float32'
tf.keras.backend.set_image_data_format('channels_last')

import numpy as np

def build_TPA_embedding(view_id):
    # VGG-16 but dims are scaled by 1/7, only 3 blocks
    # FUTURE Think about filters -> skipping cncnts
    # https://towardsdatascience.com/step-by-step-vgg16-implementation-in-keras-for-beginners-a833c686ae6c
    # b=block c=conv m=maxpool
    # input>b1c1>b1c2>b1c3>b2m1>b2c1>b2c2>b2c3>b3m1>flatten>fc
    embedding_input = Input(shape=(None, 32, 32, 1), name='TPA{}_input'.format(view_id))
    # block1
    b1c1 = TimeDistributed(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu"), name='TPA{}_b1c1'.format(view_id))(embedding_input)
    b1c2 = TimeDistributed(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu"), name='TPA{}_b1c2'.format(view_id))(b1c1)
    b1c3 = TimeDistributed(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu"), name='TPA{}_b1c3'.format(view_id))(b1c2)
    # block2
    b2m1 = TimeDistributed(MaxPool2D(pool_size=(2, 2), strides=(2, 2)), name='TPA{}_b2m1'.format(view_id))(b1c3)
    b2c1 = TimeDistributed(Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"), name='TPA{}_b2c1'.format(view_id))(b2m1)
    b2c2 = TimeDistributed(Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"), name='TPA{}_b2c2'.format(view_id))(b2c1)
    b2c3 = TimeDistributed(Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu"), name='TPA{}_b2c3'.format(view_id))(b2c2)
    # block3
    b3m1 = TimeDistributed(MaxPool2D(pool_size=(2, 2), strides=(2, 2)), name='TPA{}_b3m1'.format(view_id))(b2c3)
    # FC
    flat = TimeDistributed(Flatten(), name='TPA{}_flat'.format(view_id))(b3m1)
    dense = TimeDistributed(Dense(units=EMBEDDING_UNITS, activation="relu"), name='TPA{}_dense'.format(view_id))(flat) # flatten/fc = 6.125
    embedding_output = dense
    return embedding_input, embedding_output

class Losses_Keras:
    def __init__(self, frames, frame_shift):
        self.frames = frames
        self.frame_shfit = frame_shift
    
    def get_exponential_loss(self, from_logits=False):
        def exponential_loss(y_true, y_pred, from_logits=from_logits):
            # [B, F, 2], [B, F, 2]
            # TODO ADD JAIN
            # L_p = sigma_t(-exp())
            # L_n = softmax_cross_entropy
            # EL = L_p + L_n
            # https://stackoverflow.com/questions/39192380/tensorflow-one-class-classification
            # https://github.com/keras-team/keras/blob/7a39b6c62d43c25472b2c2476bd2a8983ae4f682/keras/backend/cntk_backend.py#L1065
            if not tf.is_tensor(y_pred):
                y_pred = tf.constant(y_pred, dtype=FLOATX)
            y_true = tf.cast(y_true, y_pred.dtype)
            if from_logits:
                y_pred = tf.keras.activations.sigmoid(y_pred)
            y_pred = tf.clip_by_value(y_pred, tf.keras.backend.epsilon(), 1. - tf.keras.backend.epsilon())
            log_pos = (y_true)*tf.math.log(y_pred)
            log_neg = (1.0 - y_true)*tf.math.log(1.0 - y_pred)
            # ONLY IF THE ACTION HAPPENS AT THE LAST FRAME
            Y = tf.cast((tf.shape(y_true)[-1]), FLOATX)
            k = tf.cast(self.frames/5, FLOATX)
            # XXX 20?
            arg=tf.cast(tf.math.exp(-(Y-tf.range(Y)-1)/k), FLOATX)
            positive_loss = -tf.reduce_sum(tf.broadcast_to(tf.math.exp(arg), tf.shape(log_pos)) * log_pos)
            negative_loss = -tf.reduce_sum(log_neg)
            total_loss = positive_loss + negative_loss
            return total_loss
        return exponential_loss

In [30]:
a = tf.constant([[0,1],[1,0], [0,1]], tf.int32) 

In [31]:
print(a)

tf.Tensor(
[[0 1]
 [1 0]
 [0 1]], shape=(3, 2), dtype=int32)


In [41]:
a = tf.constant([[0,1],[1,0], [0,1]], tf.int32) 
a = tf.expand_dims(a, 1)

In [42]:
print(a)

tf.Tensor(
[[[0 1]]

 [[1 0]]

 [[0 1]]], shape=(3, 1, 2), dtype=int32)


In [45]:
a = tf.constant([[0,1],[1,0], [0,1]], tf.int32) 
a = tf.expand_dims(a, 1)
a = tf.tile(a,tf.constant([1, 50, 1], tf.int32))

In [46]:
print(a)

tf.Tensor(
[[[0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]]

 [[1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]
  [1 0]]

 [[0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
  [0 1]
 