In [1]:
# Load the TensorBoard notebook extension.
%load_ext tensorboard
from datetime import datetime
from packaging import version
import tensorboard

import tqdm
import random
import pathlib
import itertools
import collections

import os
import cv2
import numpy as np
import pandas as pd
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score
#import remotezip as rz

import tensorflow as tf
import tensorflow_hub as hub

# Some modules to display an animation using imageio.
import imageio
from IPython import display
from urllib import request
import shutil
# pip install git+https://github.com/tensorflow/docs
#from tensorflow_docs.vis import embed

In [2]:
metadata = pd.read_csv('metadata.csv')
metadata.head()

Unnamed: 0,bbox,fps,frame_end,frame_start,instance_id,signer_id,source,split,url,variation_id,video_id,gloss
0,"[385, 37, 885, 720]",25,-1,1,0,118,aslbrick,train,http://aslbricks.org/New/ASL-Videos/book.mp4,0,69241,book
1,"[190, 25, 489, 370]",25,-1,1,1,90,aslsignbank,train,https://aslsignbank.haskins.yale.edu/dictionar...,0,65225,book
2,"[262, 1, 652, 480]",25,-1,1,2,110,valencia-asl,train,https://www.youtube.com/watch?v=0UsjUE-TXns,0,68011,book
3,"[123, 19, 516, 358]",25,60,1,3,113,lillybauer,train,https://www.youtube.com/watch?v=1QOYOZ3g-aY,0,68208,book
4,"[95, 0, 1180, 720]",25,-1,1,4,109,valencia-asl,train,https://www.youtube.com/watch?v=aGtIHKEdCds,0,68012,book


In [3]:
DATA_PATH = os.path.join('experiment_subset')
actions = np.array(['hello', 'love', 'thank you'])
splits = metadata['split'].unique()

In [4]:
def frames_from_video_file(video_path, n_frames, output_size = (172,172)):
  """ Creates frames from each video file present for each category.

    Args:
      video_path: File path to the video.
      n_frames: Number of frames to be created per video file.
      output_size: Pixel size of the output frame image.

    Return:
      An NumPy array of frames in the shape of (n_frames, height, width, channels).
  """
  # Read each frame by frame
  result = []
  src = cv2.VideoCapture(str(video_path))  

  video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)

  # If the number of frames wanted is greater than the length of the video, then start from beginning
  if n_frames > video_length:
    start = 0
  else:
    # Otherwise, start at another random point within the video
    max_start = video_length - n_frames
    start = random.randint(0, max_start)

  src.set(cv2.CAP_PROP_POS_FRAMES, start)

  for _ in range(n_frames):
    ret, frame = src.read()
    if ret:
      frame = tf.image.convert_image_dtype(frame, tf.float32)
      frame = tf.image.resize_with_pad(frame, *output_size)
      result.append(frame)
    else:
      result.append(np.zeros_like(result[0]))
  src.release()
  # Ensure that the color scheme is not inverted
  result = np.array(result)[..., [2, 1, 0]]

  return result

In [5]:
subset_paths = {'train': pathlib.Path(os.path.join(DATA_PATH, 'train')),
                'val': pathlib.Path(os.path.join(DATA_PATH, 'val')),
                'test': pathlib.Path(os.path.join(DATA_PATH, 'test'))}
print(subset_paths)

{'train': PosixPath('experiment_subset/train'), 'val': PosixPath('experiment_subset/val'), 'test': PosixPath('experiment_subset/test')}


In [6]:
class FrameGenerator:
  def __init__(self, path, n_frames):
    """ Returns a set of frames with their associated label. 

      Args:
        path: Video file paths.
        classes: List of labels for classification.
    """
    self.path = path
    self.n_frames = n_frames
    self.class_names = sorted(set(p.name for p in self.path.iterdir() if p.is_dir()))
    self.class_ids_for_name = dict((name, idx) for idx, name in enumerate(self.class_names))

  def get_files_and_class_names(self):
    video_paths = list(self.path.glob('*/*.mp4'))
    classes = [p.parent.name for p in video_paths] 
    return video_paths, classes

  def __call__(self):
    video_paths, classes = self.get_files_and_class_names()

    pairs = list(zip(video_paths, classes))

    random.shuffle(pairs)

    for path, name in pairs:
      video_frames = frames_from_video_file(path, self.n_frames) 
      #label = tf.reshape(tf.one_hot(self.class_ids_for_name[name], 3), (1,3)) # Encode labels
      label = tf.one_hot(self.class_ids_for_name[name], 3)
      #label = self.class_ids_for_name[name]
      yield video_frames, label

In [7]:
fg = FrameGenerator(subset_paths['train'], 8)

frames, label = next(fg())

print(f"Shape: {frames.shape}")
print(f"Label: {label}")
print(tf.cast(label, tf.int32))

Shape: (8, 172, 172, 3)
Label: [0. 0. 1.]
tf.Tensor([0 0 1], shape=(3,), dtype=int32)


In [8]:
# Create the training set
output_signature = (tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),
                    tf.TensorSpec(shape = (None,), dtype = tf.uint8))
train_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['train'], 8),
                                          output_signature = output_signature)

# Create the validation set
val_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['val'], 8),
                                        output_signature = output_signature)

AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size = AUTOTUNE)
val_ds = val_ds.cache().shuffle(1000).prefetch(buffer_size = AUTOTUNE)

train_ds = train_ds.batch(1)
val_ds = val_ds.batch(1)

train_frames, train_labels = next(iter(train_ds))
print(f'Shape of training set of frames: {train_frames.shape}')
print(f'Shape of training labels: {train_labels.shape}')

val_frames, val_labels = next(iter(val_ds))
print(f'Shape of validation set of frames: {val_frames.shape}')
print(f'Shape of validation labels: {val_labels.shape}')

2022-10-18 22:22:36.372491: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Shape of training set of frames: (1, 8, 172, 172, 3)
Shape of training labels: (1, 3)
Shape of validation set of frames: (1, 8, 172, 172, 3)
Shape of validation labels: (1, 3)


In [9]:
# # Create the training set
# output_signature = (tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),
#                     tf.TensorSpec(shape = (), dtype = tf.uint8))
# train_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['train'], 8),
#                                           output_signature = output_signature)

# # Create the validation set
# val_ds = tf.data.Dataset.from_generator(FrameGenerator(subset_paths['val'], 8),
#                                         output_signature = output_signature)

In [10]:
# #list(train_ds.as_numpy_iterator())
# for i, j in train_ds:
#     print(j)

In [11]:
# hub_url = "https://tfhub.dev/tensorflow/movinet/a0/stream/kinetics-600/classification/3"

# encoder = hub.KerasLayer(hub_url, trainable=False)

# # Define the image (video) input
# image_input = tf.keras.layers.Input(
#     shape=[None, None, None, 3],
#     dtype=tf.float32,
#     name='image')

# # Define the state inputs, which is a dict that maps state names to tensors.
# init_states_fn = encoder.resolved_object.signatures['init_states']
# state_shapes = {
#     name: ([s if s > 0 else None for s in state.shape], state.dtype)
#     for name, state in init_states_fn(tf.constant([0, 0, 0, 0, 3])).items()
# }
# states_input = {
#     name: tf.keras.Input(shape[1:], dtype=dtype, name=name)
#     for name, (shape, dtype) in state_shapes.items()
# }

# # The inputs to the model are the states and the video
# inputs = {**states_input, 'image': image_input}

# #x = layers.Dense(3, activation="relu")(x)
# #out = tf.keras.layers.Dense(3, activation='softmax')(inputs)

# outputs = encoder(inputs)

# #concatted = tf.keras.layers.Concatenate()(outputs)

# #out = tf.keras.layers.Dense(3, activation='softmax')(outputs)

# base_model = tf.keras.Model(inputs, outputs, name='movinet')

# #model.layers[-1].trainable = True

In [46]:
from official.projects.movinet.modeling import movinet
from official.projects.movinet.modeling import movinet_model

model_id = 'a0'

tf.keras.backend.clear_session()

# Create backbone and model.
use_positional_encoding = model_id in {'a3', 'a4', 'a5'}
backbone = movinet.Movinet(
    model_id=model_id,
    causal=True,
    conv_type='2plus1d',
    se_type='2plus3d',
    activation='hard_swish',
    gating_activation='hard_sigmoid',
    use_positional_encoding=use_positional_encoding,
    use_external_states=True,
)

# # Create a movinet classifier using this backbone.
# model = movinet_model.MovinetClassifier(
# 	backbone,
# 	num_classes=600,
# 	output_states=True)

model = movinet_model.MovinetClassifier(
    backbone=backbone,
    num_classes=3,
    output_states=True)


movinet_hub_url = f'https://tfhub.dev/tensorflow/movinet/{model_id}/stream/kinetics-600/classification/3'

movinet_hub_model = hub.KerasLayer(movinet_hub_url, trainable=True)


# Input layer for the frame sequence
image_input = tf.keras.layers.Input(
    shape=[None, None, None, 3],
    dtype=tf.float32,
    name='image')

# Input layers for the different model states.
init_states_fn = movinet_hub_model.resolved_object.signatures['init_states']


state_shapes = {
    name: ([s if s > 0 else None for s in state.shape], state.dtype)
    for name, state in init_states_fn(tf.constant([0, 0, 0, 0, 3])).items()
}

states_input = {
    name: tf.keras.Input(shape[1:], dtype=dtype, name=name)
    for name, (shape, dtype) in state_shapes.items()
}

# Wrap the Movinet model in a Keras model so that it can be finetuned.

inputs = {**states_input, 'image': image_input}

outputs = model(inputs)

In [61]:
test_model = movinet_model.MovinetClassifier(
    backbone=backbone,
    num_classes=3,
    output_states=True)

In [66]:
backbone

<official.projects.movinet.modeling.movinet.Movinet at 0x337affa90>

In [47]:
# This custom training step ignores the updated states during training as they are only important during inference.
class CustomModel(tf.keras.Model):
    def train_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        if len(data) == 3:
            x, y, sample_weight = data
        else:
            sample_weight = None
            x, y = data
            print(y)
            #print(x)
        with tf.GradientTape() as tape:
            pred, states = self(x, training=True)  # Forward pass
            print(pred)

            # Compute the loss value
            # (the loss function is configured in `compile()`)
            loss = self.compiled_loss(y, pred, regularization_losses=self.losses, sample_weight=sample_weight)

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        
        # Update metrics (includes the metric that tracks the loss)
        self.compiled_metrics.update_state(y, pred, sample_weight=sample_weight)
        
        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

In [48]:
model = CustomModel(inputs, outputs, name='movinet')

In [49]:
# model = tf.keras.Model(inputs, outputs, name='movinet')

In [50]:
for layer in model.layers[:-1]:
    layer.trainable = False
model.layers[-1].trainable = True

In [51]:
init_states = init_states_fn(tf.shape(tf.ones([1, 8, 172, 172, 3])))

def add_states(video, label, stream_states=init_states):
    return ({**stream_states, "image": video} , label)

train = train_ds.map(add_states)
val = val_ds.map(add_states)

In [None]:
# hub_url = "https://tfhub.dev/tensorflow/movinet/a3/base/kinetics-600/classification/3"

# encoder = hub.KerasLayer(hub_url, trainable=False)

# inputs = tf.keras.layers.Input(
#     shape=[None, None, None, 3],
#     dtype=tf.float32,
#     name='image')

# # [batch_size, 600]
# outputs = encoder(dict(image=inputs))

# base_model = tf.keras.Model(inputs, outputs, name='movinet')

In [None]:
# model = tf.keras.Sequential([
#     base_model,
#     tf.keras.layers.Concatenate(128),
#     #tf.keras.layers.Dense(64, activation='relu'),
#     #tf.keras.layers.Dense(3, activation='softmax'),
    
# ], name="devins_movinet")

In [None]:
# model.compile(
#     optimizer=tf.keras.optimizers.Adam(),
#     #loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
#     loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
#     metrics=['accuracy']
# )

In [None]:
# init_states = init_states_fn(tf.shape(tf.ones([1, 8, 172, 172, 3])))

# def add_states(video, label, stream_states=init_states):
#     return {**stream_states, "image": video} , label

# train = train_ds.map(add_states)

In [None]:
# model.fit(train)

In [None]:
# from official.projects.movinet.modeling import movinet
# from official.projects.movinet.modeling import movinet_model

# model_id = 'a0'

# tf.keras.backend.clear_session()

# # Create backbone and model.
# use_positional_encoding = model_id in {'a3', 'a4', 'a5'}
# backbone = movinet.Movinet(
#     model_id=model_id,
#     causal=True,
#     conv_type='2plus1d',
#     se_type='2plus3d',
#     activation='hard_swish',
#     gating_activation='hard_sigmoid',
#     use_positional_encoding=use_positional_encoding,
#     use_external_states=True,
# )

# # Create a movinet classifier using this backbone.
# model = movinet_model.MovinetClassifier(
#     backbone,
#     num_classes=600,
#     output_states=True
# )

# model.build([1, 1, 1, 1, 3])

# # Load pretrained weights
# !wget https://storage.googleapis.com/tf_model_garden/vision/movinet/movinet_a0_base.tar.gz -O movinet_a0_base.tar.gz -q
# !tar -xvf movinet_a0_base.tar.gz

# checkpoint_dir = 'movinet_a0_stream'
# checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)
# checkpoint = tf.train.Checkpoint(model=model)
# status = checkpoint.restore(checkpoint_path)
# #status.assert_existing_objects_matched()


In [None]:
# num_classes = 3
# batch_size = 1
# num_frames = 8
# resolution = 172
# num_channels = 3

# def build_classifier(backbone, num_classes, freeze_backbone=False, stream=True):
#     """Builds a classifier on top of a backbone model."""
#     model = movinet_model.MovinetClassifier(
#         backbone=backbone,
#         num_classes=num_classes,
#         output_states=stream
#     )
#     model.build([batch_size, num_frames, resolution, resolution, num_channels])

#     if freeze_backbone:
#         for layer in model.layers[:-1]:
#             layer.trainable = False
#         model.layers[-1].trainable = True

#     return model

# model = build_classifier(backbone, num_classes, freeze_backbone=True)

In [None]:
# init_states = model.init_states(tf.shape(tf.ones([8, 10, 172, 172, 3])))

# def add_states(video, label, stream_states=init_states):
#     return ({**stream_states, "image": video} , label)

# train = train_ds.map(add_states)
# val = val_ds.map(add_states)

In [52]:
model.summary()

Model: "movinet"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 image (InputLayer)             [(None, None, None,  0           []                               
                                 None, 3)]                                                        
                                                                                                  
 state/b0/l0/pool_buffer (Input  [(None, 1, 1, 1, 24  0          []                               
 Layer)                         )]                                                                
                                                                                                  
 state/b0/l0/pool_frame_count (  [(None,)]           0           []                               
 InputLayer)                                                                                

                                                                                                  
 state/b3/l2/pool_buffer (Input  [(None, 1, 1, 1, 18  0          []                               
 Layer)                         4)]                                                               
                                                                                                  
 state/b3/l2/pool_frame_count (  [(None,)]           0           []                               
 InputLayer)                                                                                      
                                                                                                  
 state/b3/l2/stream_buffer (Inp  [(None, 2, None, No  0          []                               
 utLayer)                       ne, 184)]                                                         
                                                                                                  
 state/b3/

                                r2_stream_buffer':               [0]',                            
                                (None, None, None,                'state/b2/l2/stream_buffer[0][0]
                                None, 80),                       ',                               
                                 'state_block1_laye               'state/b3/l0/pool_buffer[0][0]',
                                r2_pool_buffer': (N               'state/b3/l0/pool_frame_count[0]
                                one, None, 1, 1, 80              [0]',                            
                                ),                                'state/b3/l0/stream_buffer[0][0]
                                 'state_block1_laye              ',                               
                                r2_pool_frame_count               'state/b3/l1/pool_buffer[0][0]',
                                ': (None,),                       'state/b3/l1/pool_frame_count[0]
          

                                4),                                                               
                                 'state_block3_laye                                               
                                r3_pool_frame_count                                               
                                ': (None,),                                                       
                                 'state_block4_laye                                               
                                r0_stream_buffer':                                                
                                (None, None, None,                                                
                                None, 384),                                                       
                                 'state_block4_laye                                               
                                r0_pool_buffer': (N                                               
          

In [53]:
num_epochs = 3

train_steps = 10#len(train_dataset_df) // batch_size
total_train_steps = train_steps * num_epochs
test_steps = 1#(len(valid_dataset_df) // batch_size) or 1

loss_obj = tf.keras.losses.CategoricalCrossentropy(
    from_logits=True,
    label_smoothing=0.1
)

metrics = [
    tf.keras.metrics.TopKCategoricalAccuracy(
        k=1, name='top_1', dtype=tf.float32)#,
    #tf.keras.metrics.TopKCategoricalAccuracy(
        #k=5, name='top_5', dtype=tf.float32),
]

initial_learning_rate = 0.01
learning_rate = tf.keras.optimizers.schedules.CosineDecay(
    initial_learning_rate, decay_steps=total_train_steps,
)
optimizer = tf.keras.optimizers.RMSprop(
    learning_rate, rho=0.9, momentum=0.9, epsilon=1.0, clipnorm=1.0)

model.compile(loss=loss_obj, optimizer=optimizer, metrics=metrics)

checkpoint_filepath = "/movinet_checkpoints_stream"

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_top_1',
    mode='max',
    save_best_only=True
)

callbacks = [
    tf.keras.callbacks.TensorBoard()#,
    #model_checkpoint_callback
]

In [54]:
train_dataset = train
valid_dataset = val

#results = model_wrapped.fit(
results = model.fit(
#results = model_movinet.fit(
    train_dataset,
    validation_data=valid_dataset,
    epochs=20,
    #steps_per_epoch=train_steps,
    validation_steps=test_steps,
    callbacks=callbacks,
    validation_freq=1,
    verbose=1
)

Epoch 1/20
Tensor("IteratorGetNext:44", shape=(None, None), dtype=uint8)


ValueError: in user code:

    File "/Users/dev/Sign-Language-Translator/env/lib/python3.8/site-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/Users/dev/Sign-Language-Translator/env/lib/python3.8/site-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/dev/Sign-Language-Translator/env/lib/python3.8/site-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/var/folders/sz/byw7vv3s0yb0nppz6svp8lq80000gn/T/ipykernel_20367/2123478485.py", line 14, in train_step
        pred, states = self(x, training=True)  # Forward pass
    File "/Users/dev/Sign-Language-Translator/env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/Users/dev/Sign-Language-Translator/env/lib/python3.8/site-packages/keras/engine/input_spec.py", line 197, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "movinet" "                 f"(type CustomModel).
    
    Missing data for input "state_block0_layer0_pool_buffer". You passed a data dictionary with keys ['state/b0/l0/pool_buffer', 'state/b0/l0/pool_frame_count', 'state/b1/l0/pool_buffer', 'state/b1/l0/pool_frame_count', 'state/b1/l0/stream_buffer', 'state/b1/l1/pool_buffer', 'state/b1/l1/pool_frame_count', 'state/b1/l1/stream_buffer', 'state/b1/l2/pool_buffer', 'state/b1/l2/pool_frame_count', 'state/b1/l2/stream_buffer', 'state/b2/l0/pool_buffer', 'state/b2/l0/pool_frame_count', 'state/b2/l0/stream_buffer', 'state/b2/l1/pool_buffer', 'state/b2/l1/pool_frame_count', 'state/b2/l1/stream_buffer', 'state/b2/l2/pool_buffer', 'state/b2/l2/pool_frame_count', 'state/b2/l2/stream_buffer', 'state/b3/l0/pool_buffer', 'state/b3/l0/pool_frame_count', 'state/b3/l0/stream_buffer', 'state/b3/l1/pool_buffer', 'state/b3/l1/pool_frame_count', 'state/b3/l1/stream_buffer', 'state/b3/l2/pool_buffer', 'state/b3/l2/pool_frame_count', 'state/b3/l2/stream_buffer', 'state/b3/l3/pool_buffer', 'state/b3/l3/pool_frame_count', 'state/b3/l3/stream_buffer', 'state/b4/l0/pool_buffer', 'state/b4/l0/pool_frame_count', 'state/b4/l0/stream_buffer', 'state/b4/l1/pool_buffer', 'state/b4/l1/pool_frame_count', 'state/b4/l2/pool_buffer', 'state/b4/l2/pool_frame_count', 'state/b4/l3/pool_buffer', 'state/b4/l3/pool_frame_count', 'state/head/pool_buffer', 'state/head/pool_frame_count', 'image']. Expected the following keys: ['image', 'state_block0_layer0_pool_buffer', 'state_block0_layer0_pool_frame_count', 'state_block1_layer0_pool_buffer', 'state_block1_layer0_pool_frame_count', 'state_block1_layer0_stream_buffer', 'state_block1_layer1_pool_buffer', 'state_block1_layer1_pool_frame_count', 'state_block1_layer1_stream_buffer', 'state_block1_layer2_pool_buffer', 'state_block1_layer2_pool_frame_count', 'state_block1_layer2_stream_buffer', 'state_block2_layer0_pool_buffer', 'state_block2_layer0_pool_frame_count', 'state_block2_layer0_stream_buffer', 'state_block2_layer1_pool_buffer', 'state_block2_layer1_pool_frame_count', 'state_block2_layer1_stream_buffer', 'state_block2_layer2_pool_buffer', 'state_block2_layer2_pool_frame_count', 'state_block2_layer2_stream_buffer', 'state_block3_layer0_pool_buffer', 'state_block3_layer0_pool_frame_count', 'state_block3_layer0_stream_buffer', 'state_block3_layer1_pool_buffer', 'state_block3_layer1_pool_frame_count', 'state_block3_layer1_stream_buffer', 'state_block3_layer2_pool_buffer', 'state_block3_layer2_pool_frame_count', 'state_block3_layer2_stream_buffer', 'state_block3_layer3_pool_buffer', 'state_block3_layer3_pool_frame_count', 'state_block3_layer3_stream_buffer', 'state_block4_layer0_pool_buffer', 'state_block4_layer0_pool_frame_count', 'state_block4_layer0_stream_buffer', 'state_block4_layer1_pool_buffer', 'state_block4_layer1_pool_frame_count', 'state_block4_layer2_pool_buffer', 'state_block4_layer2_pool_frame_count', 'state_block4_layer3_pool_buffer', 'state_block4_layer3_pool_frame_count', 'state_head_pool_buffer', 'state_head_pool_frame_count']
    
    Call arguments received by layer "movinet" "                 f"(type CustomModel):
      • inputs={'state/b0/l0/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 24), dtype=float32)', 'state/b0/l0/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b1/l0/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 80), dtype=float32)', 'state/b1/l0/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b1/l0/stream_buffer': 'tf.Tensor(shape=(1, 2, 22, 22, 80), dtype=float32)', 'state/b1/l1/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 80), dtype=float32)', 'state/b1/l1/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b1/l1/stream_buffer': 'tf.Tensor(shape=(1, 2, 22, 22, 80), dtype=float32)', 'state/b1/l2/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 80), dtype=float32)', 'state/b1/l2/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b1/l2/stream_buffer': 'tf.Tensor(shape=(1, 2, 22, 22, 80), dtype=float32)', 'state/b2/l0/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 184), dtype=float32)', 'state/b2/l0/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b2/l0/stream_buffer': 'tf.Tensor(shape=(1, 4, 11, 11, 184), dtype=float32)', 'state/b2/l1/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 112), dtype=float32)', 'state/b2/l1/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b2/l1/stream_buffer': 'tf.Tensor(shape=(1, 2, 11, 11, 112), dtype=float32)', 'state/b2/l2/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 184), dtype=float32)', 'state/b2/l2/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b2/l2/stream_buffer': 'tf.Tensor(shape=(1, 2, 11, 11, 184), dtype=float32)', 'state/b3/l0/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 184), dtype=float32)', 'state/b3/l0/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b3/l0/stream_buffer': 'tf.Tensor(shape=(1, 4, 11, 11, 184), dtype=float32)', 'state/b3/l1/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 184), dtype=float32)', 'state/b3/l1/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b3/l1/stream_buffer': 'tf.Tensor(shape=(1, 2, 11, 11, 184), dtype=float32)', 'state/b3/l2/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 184), dtype=float32)', 'state/b3/l2/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b3/l2/stream_buffer': 'tf.Tensor(shape=(1, 2, 11, 11, 184), dtype=float32)', 'state/b3/l3/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 184), dtype=float32)', 'state/b3/l3/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b3/l3/stream_buffer': 'tf.Tensor(shape=(1, 2, 11, 11, 184), dtype=float32)', 'state/b4/l0/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 384), dtype=float32)', 'state/b4/l0/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b4/l0/stream_buffer': 'tf.Tensor(shape=(1, 4, 6, 6, 384), dtype=float32)', 'state/b4/l1/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 280), dtype=float32)', 'state/b4/l1/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b4/l2/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 280), dtype=float32)', 'state/b4/l2/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/b4/l3/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 344), dtype=float32)', 'state/b4/l3/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'state/head/pool_buffer': 'tf.Tensor(shape=(1, 1, 1, 1, 480), dtype=float32)', 'state/head/pool_frame_count': 'tf.Tensor(shape=(1,), dtype=int32)', 'image': 'tf.Tensor(shape=(None, None, None, None, 3), dtype=float32)'}
      • training=True
      • mask=None
