In [74]:
import tensorflow as tf
import pandas as pd
import numpy as np
import json

# Get model architecture

In [2]:
model = tf.saved_model.load("pretrained_model/saved_model_with_signatures")

# Collecting signature details
signatures_data = []

# Iterate over each signature in the model
for key, signature in model.signatures.items():
    # Initialize dictionaries for current signature's inputs and outputs
    inputs_dict = {}
    outputs_dict = {}

    # Iterate over inputs and outputs, filling in the dictionaries
    for input_key, input_val in signature.structured_input_signature[1].items():
        inputs_dict[input_key] = str(input_val.dtype.name)

    for output_key, output_val in signature.structured_outputs.items():
        outputs_dict[output_key] = str(output_val.dtype.name)

    # Append the current signature's details to the list
    signatures_data.append({
        "Signature Key": key,
        "Inputs": inputs_dict,
        "Outputs": outputs_dict
    })

# Convert list of signature data into a DataFrame for visualization
df_signatures = pd.DataFrame(signatures_data)

df_inputs = df_signatures["Inputs"].apply(pd.Series)
df_outputs = df_signatures["Outputs"].apply(pd.Series)
df_expanded = pd.concat([df_signatures.drop(['Inputs', 'Outputs'], axis=1), df_inputs, df_outputs], axis=1)
df_expanded.to_csv("pretrained_model/model_architecture.csv", index=False)

df_expanded

Unnamed: 0,Signature Key,embedding_sample,target_seq_len,inp_pos,target_pos,inp_embeddings,input_seq_len,input_stroke,pen,seq_len,stroke,pi,sigma,embedding_sample.1,mu,position_sample
0,decode_stroke,float32,int32,,,,,,float32,int32,float32,,,,,
1,predict_embedding,,,float32,float32,float32,,,,,,float32,float32,float32,float32,
2,predict_position,,,float32,,float32,,,,,,float32,float32,,float32,float32
3,encode_stroke,,,,,,int32,float32,,,,,,float32,,
4,forward_pass,,int32,,,,int32,float32,float32,int32,float32,,,float32,,


# Preprocesses raw drawings and get the right input format for `encode_stroke`

In [136]:
# Define functions to adjusts all drawings to have a consistent scale or size
def get_bounding_box(drawing):
    minx = 99999
    miny = 99999
    maxx = 0
    maxy = 0

    for s in drawing:
      minx = min(minx, min(s[0]))
      maxx = max(maxx, max(s[0]))
      miny = min(miny, min(s[1]))
      maxy = max(maxy, max(s[1]))
    return (minx, miny, maxx, maxy)

def size_normalization(drawing):
  bb = get_bounding_box(drawing)
  width, height = bb[2] - bb[0], bb[3] - bb[1]
  offset_x, offset_y = bb[0], bb[1]
  if height < 1e-6:
    height = 1

  size_normalized_drawing = [[[(x - offset_x) / height for x in stroke[0]],
                              [(y - offset_y) / height for y in stroke[1]],
                              [t for t in stroke[2]]]
                             for stroke in drawing]

  return size_normalized_drawing

# Define a function to resample the ink to have uniform time steps
# (Ensure that each point is separated by a constant time step)
def resample_ink(drawing, timestep=20):
    resampled_drawing = []
    
    for stroke in drawing:
        # Initialize with the first point
        resampled_stroke = [[stroke[0][0], stroke[1][0], stroke[2][0]]]  
        
        for i in range(1, len(stroke[0])):
            x0, y0, t0 = stroke[0][i-1], stroke[1][i-1], stroke[2][i-1]
            x1, y1, t1 = stroke[0][i], stroke[1][i], stroke[2][i]
            distance = np.sqrt((x1 - x0)**2 + (y1 - y0)**2)
            if distance == 0:
                continue
            else:
                new_points = max(1, int(distance / timestep))
                for j in range(1, new_points + 1):
                    new_point = [x0 + j * (x1 - x0) / new_points,
                                y0 + j * (y1 - y0) / new_points,
                                t0 + j * (t1 - t0) / new_points]
                    resampled_stroke.append(new_point)
        
        resampled_drawing.append(resampled_stroke)

    return resampled_drawing

Here I try with the first drawing in quick_draw_Eiffel_Tower:

In [90]:
ndjson_file_path = 'data_dir/quick_draw/raw_Eiffel_Tower.ndjson'

with open(ndjson_file_path, 'r') as f:
    first_line = f.readline().strip()
    first_row = json.loads(first_line)

sample_drawing = first_row["drawing"]
print(f"There are totally {len(sample_drawing)} strokes in the sample drawing")

There are totally 5 strokes in the sample drawing


In [89]:
# Sequence length for each stroke
[len(i[0]) for i in sample_drawing]

[41, 95, 76, 23, 14]

In [137]:
# Preprocess the drawing
drawing_normalized = size_normalization(sample_drawing)
drawing_resampled = resample_ink(drawing_normalized)

In [149]:
drawing_normalized

[[[0.0,
   0.05771836099577235,
   0.09480277360999947,
   0.12053340190025204,
   0.13908714026701963,
   0.1612243812842494,
   0.1884583022101865,
   0.21878779516224942,
   0.2471063333367372,
   0.2750094658465666,
   0.30659171523755563,
   0.34314535128716145,
   0.387723137808792,
   0.4311141156786563,
   0.47286661046432926,
   0.5077290454427539,
   0.5356750616204352,
   0.561590328623709,
   0.582830848252029,
   0.6048494109192533,
   0.6230569862827093,
   0.6421843597302388,
   0.6654688583331367,
   0.6886874091488746,
   0.7100235555837479,
   0.7336376925156002,
   0.7580661412541236,
   0.778535257895336,
   0.8022450216337415,
   0.8251470633138497,
   0.8472480732908796,
   0.8730215349455613,
   0.8990621077747813,
   0.9255048061650668,
   0.9502431239874235,
   0.9768342174746746,
   1.0062307328459166,
   1.0355316717140282,
   1.064321628414351,
   1.0916148570756519,
   1.1014752333865023],
  [0.941734350341105,
   0.9453244050123739,
   0.9520694903554002,


Convert the input format and shape:

In [138]:
# Define a function to formalize the input shape to be used in `encode_stroke` signature
# Specifically, the shape of `input_seq_len` is (None, ) and the shape of input_stroke is (None, None, 3) 
def formalize_input(stroke):
    input_stroke_tensor = tf.convert_to_tensor([stroke], dtype=tf.float32) # Add an extra dimension for batch size
    input_seq_len_tensor = tf.convert_to_tensor([len(stroke)], dtype=tf.int32)

    return input_seq_len_tensor, input_stroke_tensor

# Use pretrained model for prediction

## Try the encode_stroke signature

In [139]:
encode_stroke = model.signatures["encode_stroke"]
encode_stroke

<ConcreteFunction (*, input_seq_len: TensorSpec(shape=(None,), dtype=tf.int32, name='input_seq_len'), input_stroke: TensorSpec(shape=(None, None, 3), dtype=tf.float32, name='input_stroke')) -> Dict[['embedding_sample', TensorSpec(shape=(None, 8), dtype=tf.float32, name='embedding_sample')]] at 0x2F1DCF9A0>

In [140]:
# Lists to collect embeddings and positions
embedding_sample_list = []
inp_pos_list = []

# Process only the first two strokes
for stroke in drawing_resampled[:2]:
    input_seq_len_tensor, input_stroke_tensor = formalize_input(stroke)
    output = encode_stroke(input_seq_len=input_seq_len_tensor, 
                           input_stroke=input_stroke_tensor)
    embedding_sample = output["embedding_sample"]
    
    # Extract the first point's coordinates as position
    inp_pos = [[stroke[0][0], stroke[1][0]]]
    inp_pos_list.extend(inp_pos)
    embedding_sample_list.append(embedding_sample)

# Convert lists to tensors with the right shape
inp_pos_tensor = tf.convert_to_tensor(inp_pos_list, dtype=tf.float32)  # Shape: (num_strokes, 2)
inp_embeddings_tensor = tf.concat(embedding_sample_list, axis=0)  # Shape: (num_strokes, 8)

# Add batch dimension
inp_pos_tensor = tf.expand_dims(inp_pos_tensor, axis=0)  # Shape: (1, num_strokes, 2)
inp_embeddings_tensor = tf.expand_dims(inp_embeddings_tensor, axis=0)  # Shape: (1, num_strokes, 8)

## Try the predict_position signature

In [141]:
predict_position = model.signatures["predict_position"]
predict_position

<ConcreteFunction (*, inp_pos: TensorSpec(shape=(None, None, 2), dtype=tf.float32, name='inp_pos'), inp_embeddings: TensorSpec(shape=(None, None, 8), dtype=tf.float32, name='inp_embeddings')) -> Dict[['position_sample', TensorSpec(shape=(None, 2), dtype=tf.float32, name='position_sample')], ['pi', TensorSpec(shape=(None, 10), dtype=tf.float32, name='pi')], ['sigma', TensorSpec(shape=(None, 1, 10, 2), dtype=tf.float32, name='sigma')], ['mu', TensorSpec(shape=(None, 1, 10, 2), dtype=tf.float32, name='mu')]] at 0x2F3B70A30>

In [142]:
predict_position_result = predict_position(inp_pos=inp_pos_tensor, 
                                           inp_embeddings=inp_embeddings_tensor)
predict_position_result

{'position_sample': <tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[ 6.838819 , -1.4229445]], dtype=float32)>,
 'pi': <tf.Tensor: shape=(1, 10), dtype=float32, numpy=
 array([[3.6635324e-08, 1.1230591e-07, 1.7465905e-07, 5.0777880e-09,
         2.5925906e-03, 2.6572111e-06, 9.4617978e-02, 3.7509776e-07,
         9.0278608e-01, 5.3436561e-10]], dtype=float32)>,
 'sigma': <tf.Tensor: shape=(1, 1, 10, 2), dtype=float32, numpy=
 array([[[[ 9.514327  ,  3.9390216 ],
          [ 2.0930994 ,  4.950522  ],
          [ 5.1820283 ,  1.2394203 ],
          [28.916815  ,  3.907423  ],
          [ 0.23929727,  0.6537428 ],
          [ 1.112262  ,  0.4390132 ],
          [18.509413  ,  1.1268417 ],
          [33.872047  ,  2.2025366 ],
          [21.131636  ,  5.791792  ],
          [ 3.5448177 ,  1.1053672 ]]]], dtype=float32)>,
 'mu': <tf.Tensor: shape=(1, 1, 10, 2), dtype=float32, numpy=
 array([[[[-0.25956464,  2.251717  ],
          [ 0.11476465,  1.4058719 ],
          [ 0.04891471,  0.

In [143]:
target_pos = predict_position_result['position_sample']
target_pos_tensor = tf.expand_dims(target_pos, axis=0)

# Try predict_embedding signature

In [144]:
predict_embedding = model.signatures["predict_embedding"]
predict_embedding

<ConcreteFunction (*, inp_pos: TensorSpec(shape=(None, None, 2), dtype=tf.float32, name='inp_pos'), target_pos: TensorSpec(shape=(None, 1, 2), dtype=tf.float32, name='target_pos'), inp_embeddings: TensorSpec(shape=(None, None, 8), dtype=tf.float32, name='inp_embeddings')) -> Dict[['pi', TensorSpec(shape=(None, 10), dtype=tf.float32, name='pi')], ['sigma', TensorSpec(shape=(None, 1, 10, 8), dtype=tf.float32, name='sigma')], ['embedding_sample', TensorSpec(shape=(None, 8), dtype=tf.float32, name='embedding_sample')], ['mu', TensorSpec(shape=(None, 1, 10, 8), dtype=tf.float32, name='mu')]] at 0x30F5F9A00>

In [145]:
predict_embedding_result = predict_embedding(inp_pos = inp_pos_tensor, 
                                             target_pos=target_pos_tensor,
                                             inp_embeddings=inp_embeddings_tensor)
predict_embedding_result

{'pi': <tf.Tensor: shape=(1, 10), dtype=float32, numpy=
 array([[0.1571875 , 0.10592218, 0.07599134, 0.07578772, 0.07643081,
         0.03154781, 0.05140919, 0.30794448, 0.0118671 , 0.10591192]],
       dtype=float32)>,
 'sigma': <tf.Tensor: shape=(1, 1, 10, 8), dtype=float32, numpy=
 array([[[[0.3948015 , 0.22166586, 0.12100061, 0.27087203, 0.17374739,
           0.16457334, 0.33858636, 0.18659966],
          [0.35628676, 0.2805058 , 0.32534322, 0.26516148, 0.23701735,
           0.33111304, 0.36097148, 0.26723394],
          [0.34547696, 0.39308548, 0.26500902, 0.46635893, 0.48166293,
           0.42072284, 0.45070317, 0.21530402],
          [0.46005574, 0.35597256, 0.24866836, 0.55371296, 0.2796112 ,
           0.21513234, 0.5900684 , 0.41165432],
          [0.23593482, 0.18024899, 0.18565036, 0.3458232 , 0.18754844,
           0.4380323 , 0.17873028, 0.20960614],
          [0.35366374, 0.3891411 , 0.250027  , 0.41915718, 0.461722  ,
           0.37093452, 0.4005495 , 0.2734264 ],
 

# Try the decode_stroke signature

In [146]:
decode_stroke = model.signatures["decode_stroke"]
decode_stroke

<ConcreteFunction (*, embedding_sample: TensorSpec(shape=(None, 8), dtype=tf.float32, name='embedding_sample'), target_seq_len: TensorSpec(shape=(), dtype=tf.int32, name='target_seq_len')) -> Dict[['pen', TensorSpec(shape=(None, None, 1), dtype=tf.float32, name='pen')], ['seq_len', TensorSpec(shape=(None,), dtype=tf.int32, name='seq_len')], ['stroke', TensorSpec(shape=(None, None, 2), dtype=tf.float32, name='stroke')]] at 0x2F01A62B0>

In [147]:
# Use the (real) next stroke to determine the target_seq_len for the decoder
target_seq_len_tensor = tf.convert_to_tensor(len(drawing_resampled[2]), dtype=tf.int32)

decode_stroke_result = decode_stroke(embedding_sample = predict_embedding_result["embedding_sample"],
                                     target_seq_len = target_seq_len_tensor)

decode_stroke_result["stroke"]
# This decoded stroke with pen state and stroke are used as the new input 
# for the encoder, whose starting position and embeddings will be used
# for auto-regressive for the remaining stokes

<tf.Tensor: shape=(1, 76, 2), dtype=float32, numpy=
array([[[ -0.28265634,  -0.2638566 ],
        [ -0.15693945,  -0.02932076],
        [ -0.15595943,  -0.10539629],
        [ -0.43970346,  -0.18493247],
        [ -0.6928278 ,  -0.48688692],
        [ -0.9503494 ,  -1.1250703 ],
        [ -1.4586362 ,  -1.9195957 ],
        [ -2.0512776 ,  -2.8667626 ],
        [ -2.7315304 ,  -4.113566  ],
        [ -3.5267816 ,  -5.344566  ],
        [ -6.230611  ,  -9.002508  ],
        [ -6.9633293 , -10.03128   ],
        [ -7.7016783 , -11.002031  ],
        [ -8.42174   , -11.986371  ],
        [ -9.10001   , -13.03823   ],
        [ -9.756411  , -13.865174  ],
        [-10.327514  , -14.504483  ],
        [-10.806738  , -14.917845  ],
        [-11.2465515 , -15.183992  ],
        [-11.576551  , -15.289197  ],
        [-11.713126  , -15.258755  ],
        [-11.656347  , -15.121748  ],
        [-11.376671  , -14.850074  ],
        [-11.012295  , -14.446777  ],
        [-10.598457  , -14.021563  ]

In [135]:
def denormalize_stroke(stroke, drawing):
    denormalized_stroke = []

    minx, miny, maxx, maxy = get_bounding_box(drawing)
    width, height = maxx - minx, maxy - miny


    for x_norm, y_norm in stroke:
        x = (x_norm * width) + minx
        y = (y_norm * height) + miny
        denormalized_stroke.append([x, y])
    return denormalized_stroke


stroke_original = denormalize_stroke(decode_stroke_result["stroke"][0], sample_drawing)

stroke_original

[[<tf.Tensor: shape=(), dtype=float32, numpy=-81.79892>,
  <tf.Tensor: shape=(), dtype=float32, numpy=-15.237579>],
 [<tf.Tensor: shape=(), dtype=float32, numpy=-37.231617>,
  <tf.Tensor: shape=(), dtype=float32, numpy=63.567722>],
 [<tf.Tensor: shape=(), dtype=float32, numpy=-51.91903>,
  <tf.Tensor: shape=(), dtype=float32, numpy=56.075348>],
 [<tf.Tensor: shape=(), dtype=float32, numpy=-143.01344>,
  <tf.Tensor: shape=(), dtype=float32, numpy=22.645748>],
 [<tf.Tensor: shape=(), dtype=float32, numpy=-220.10928>,
  <tf.Tensor: shape=(), dtype=float32, numpy=-63.165787>],
 [<tf.Tensor: shape=(), dtype=float32, numpy=-286.36203>,
  <tf.Tensor: shape=(), dtype=float32, numpy=-246.46031>],
 [<tf.Tensor: shape=(), dtype=float32, numpy=-407.363>,
  <tf.Tensor: shape=(), dtype=float32, numpy=-523.14746>],
 [<tf.Tensor: shape=(), dtype=float32, numpy=-577.9726>,
  <tf.Tensor: shape=(), dtype=float32, numpy=-824.4982>],
 [<tf.Tensor: shape=(), dtype=float32, numpy=-759.6229>,
  <tf.Tensor: sh

# Try forward pass signature

In [116]:
forward_pass = model.signatures["forward_pass"]
forward_pass

<ConcreteFunction (*, input_seq_len: TensorSpec(shape=(None,), dtype=tf.int32, name='input_seq_len'), input_stroke: TensorSpec(shape=(None, None, 3), dtype=tf.float32, name='input_stroke'), target_seq_len: TensorSpec(shape=(), dtype=tf.int32, name='target_seq_len')) -> Dict[['pen', TensorSpec(shape=(None, None, 1), dtype=tf.float32, name='pen')], ['embedding_sample', TensorSpec(shape=(None, 8), dtype=tf.float32, name='embedding_sample')], ['seq_len', TensorSpec(shape=(None,), dtype=tf.int32, name='seq_len')], ['stroke', TensorSpec(shape=(None, None, 2), dtype=tf.float32, name='stroke')]] at 0x30E37E970>

In [125]:
input_seq_len_list = []
input_stroke_list = []

# Process only the first two strokes
for stroke in drawing_resampled[:2]:
    input_seq_len_tensor, input_stroke_tensor = formalize_input(stroke)
    input_seq_len_list.append(input_seq_len_tensor)
    input_stroke_list.append(input_stroke_tensor)

# Convert lists to tensors with the right shape
input_seq_len_tensor = tf.convert_to_tensor(input_seq_len_list, dtype=tf.int32)
input_stroke_tensor = tf.concat(input_stroke_list, axis=0)

# Add batch dimension
input_seq_len_tensor = tf.expand_dims(input_seq_len_tensor, axis=0)  # Shape: (1, num_strokes, 2)
# inp_embeddings_tensor = tf.expand_dims(inp_embeddings_tensor, axis=0)  # Shape: (1, num_strokes, 8)


# forward_pass(input_seq_len=input_seq_len_tensor, input_stroke=input_stroke_tensor,
#              target_seq_len = target_seq_len_tensor)

input_seq_len_tensor

InvalidArgumentError: {{function_node __wrapped__ConcatV2_N_2_device_/job:localhost/replica:0/task:0/device:CPU:0}} ConcatOp : Dimension 1 in both shapes must be equal: shape[0] = [1,41,3] vs. shape[1] = [1,95,3] [Op:ConcatV2] name: concat

# Random Try Code

In [None]:
# def process_drawing_for_encode_stroke(drawing):
#     flat_strokes = []
#     total_points = 0
#     for stroke in drawing:
#         for i in range(len(stroke[0])):  # Iterate through points in the stroke
#             x = stroke[0][i]
#             y = stroke[1][i]
#             # Assuming the third list contains timing information, not directly used here
#             # If there's a specific "pen state" value needed, adjust accordingly
#             flat_strokes.append([x, y, 1])  # Use '1' as a placeholder for pen state
#         total_points += len(stroke[0])
    
#     # Convert to tensors
#     input_stroke = tf.constant(flat_strokes, dtype=tf.float32)
#     input_seq_len = tf.constant([total_points], dtype=tf.int32)
    
#     return input_stroke, input_seq_len

# # Find the maximum sequence length across all processed drawings
# max_seq_len = max(input_stroke.shape[0] for input_stroke, _ in processed_drawings)

# # Pad each drawing sequence to the maximum length
# padded_drawings = []
# for input_stroke, input_seq_len in processed_drawings:
#     # Calculate the padding amounts
#     padding = [[0, max_seq_len - tf.shape(input_stroke)[0]], [0, 0]]  # Pad the sequence length to max_seq_len
    
#     # Pad the stroke data
#     padded_stroke = tf.pad(input_stroke, padding, "CONSTANT")
    
#     # Append the padded stroke and original sequence length
#     padded_drawings.append((padded_stroke, input_seq_len))

# padded_drawings

# import json

# processed_drawings = []

# # Draw a sample of 320 sketches
# max_rows = 320
# current_row = 0

# with open("data_dir/quick_draw/raw_Eiffel_Tower.ndjson", 'r') as f:
#     for line in f:
#         if current_row < max_rows:
#             drawing_data = json.loads(line)
#             drawing = drawing_data["drawing"]
#             processed_drawing = process_drawing_for_encode_stroke(drawing)
#             processed_drawings.append(processed_drawing)
#             current_row += 1
#         else:
#             break
# stroke_tensors = [x[0] for x in padded_drawings]
# seq_len_tensors = [x[1] for x in padded_drawings]

# stroke_dataset = tf.data.Dataset.from_tensor_slices(stroke_tensors)
# seq_len_dataset = tf.data.Dataset.from_tensor_slices(seq_len_tensors)

# # Combine into a single dataset
# dataset = tf.data.Dataset.zip((stroke_dataset, seq_len_dataset))

# dataset

# # Set your desired batch size
# batch_size = 128

# # Batch the dataset. No need to specify padding values or shapes here because
# # your tensors within each dataset element already have a uniform shape after padding.
# batched_dataset = dataset.batch(batch_size)

In [None]:
# # Define the features of importance for input formatting
# feature_description = {
#     'ink': tf.io.VarLenFeature(tf.float32),
# }

# # For now, I use a very small sample of dataset
# file_paths = "data_dir/quick_draw/training/raw_Eiffel_Tower-00000-of-00010"
# dataset = tf.data.TFRecordDataset(file_paths)

# # for raw_record in dataset.take(1):
# #   example = tf.train.Example()
# #   example.ParseFromString(raw_record.numpy())
# #   print(example)

# # Parse a Tensorflow Example proto 
# parsed_dataset = dataset.map(lambda x: tf.io.parse_single_example(x, feature_description)) 

# # Define a function to get the `input_seq_len` and `input_stroke` arguments 
# # for fitting the pretrained model (specifically, `encode_stroke` signature)
# max_length_threshold = 201

# def get_input_arguments(parsed_record):
#     # Reshape and extract the first three dimensions from parsed_record['ink']
#     # (x coordinate, y coordinate, and pen state)
#     ink = tf.sparse.to_dense(parsed_record['ink'])
#     input_seq_len = tf.shape(ink)[0] // 4
#     ink_reshaped = tf.reshape(ink, (input_seq_len, 4))
#     # Expand the input_stroke to three dimensions for batching 
#     input_stroke = tf.expand_dims(ink_reshaped[:, :3], axis=0)
    
#     # Make sure it matches the input shape 
#     input_seq_len = tf.reshape(input_seq_len, [1]) 

#     return input_seq_len, input_stroke
    
# preprocessed_dataset = parsed_dataset.map(get_input_arguments)