<a href="https://colab.research.google.com/github/Vinaypatil-Ev/vinEvPy-GoCoLab/blob/main/Tensorflow/TensorflowPrac8_RaggedTensor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Ragged Tensor
### Ragged tensor are tensor of variable length nested list

In [None]:
import tensorflow as tf
import math

## 1. tf.ragged.constant()

In [None]:
digits = tf.ragged.constant([[3, 2, 3, 1], [3, 3, 3], [], [1, 2, 3, 4]])
digits

<tf.RaggedTensor [[3, 2, 3, 1], [3, 3, 3], [], [1, 2, 3, 4]]>

In [None]:
string = tf.ragged.constant([["abcdef", "xyzaaa", "uvwaa"], [], ["abbgv", "bcaddd"]])
string

<tf.RaggedTensor [[b'abcdef', b'xyzaaa', b'uvwaa'], [], [b'abbgv', b'bcaddd']]>

### operation on ragged tensor
### ragged tensor support thousands of operations such as,
1. tf.add()
2. tf.concat()
3. tf.tile()
4. tf.reduce_mean()
5. tf.string.substr()
6. tf.map_fn()
7.
.
.
.

In [None]:
tf.add(digits, 5)

<tf.RaggedTensor [[8, 7, 8, 6], [8, 8, 8], [], [6, 7, 8, 9]]>

In [None]:
tf.reduce_mean(digits, axis=1)

<tf.Tensor: shape=(4,), dtype=float64, numpy=array([2.25, 3.  ,  nan, 2.5 ])>

In [None]:
tf.concat([digits, [[5, 5]]], axis=0)

<tf.RaggedTensor [[3, 2, 3, 1], [3, 3, 3], [], [1, 2, 3, 4], [5, 5]]>

In [None]:
tf.tile(digits, [1, 2])

<tf.RaggedTensor [[3, 2, 3, 1, 3, 2, 3, 1], [3, 3, 3, 3, 3, 3], [], [1, 2, 3, 4, 1, 2, 3, 4]]>

In [None]:
tf.strings.substr(string, 0, 2)

<tf.RaggedTensor [[b'ab', b'xy', b'uv'], [], [b'ab', b'bc']]>

In [None]:
tf.map_fn(tf.math.square, digits)

<tf.RaggedTensor [[9, 4, 9, 1], [9, 9, 9], [], [1, 4, 9, 16]]>

In [None]:
square_plus_one = lambda x: x**2 + 1
tf.ragged.map_flat_values(square_plus_one, digits)

<tf.RaggedTensor [[10, 5, 10, 2], [10, 10, 10], [], [2, 5, 10, 17]]>

In [None]:
decode = tf.io.decode_base64(tf.ragged.constant([["ok"]]))
decode

<tf.RaggedTensor [[b'\xa2']]>

In [None]:
import sys

In [None]:
sys.getsizeof(decode)

56

In [None]:
sys.getsizeof(tf.ragged.constant([["oi"]]))

56

In [None]:
tf.io.encode_base64(decode)

<tf.RaggedTensor [[b'og']]>

### construction of ragged tensor
1. *tf.ragged.constant()*
2. *tf.RaggedTensor.from_value_rowids()*
3. *tf.RaggedTensor.form_row_lengths()*
4. *tf.RaggedTensor.from_row_splits()*



In [None]:
tf.RaggedTensor.from_value_rowids(
    values=[1, 2, 3, 4, 5, 6, 7],
    value_rowids=[0, 0, 2, 2, 3, 3, 4]
    )

<tf.RaggedTensor [[1, 2], [], [3, 4], [5, 6], [7]]>

In [None]:
tf.RaggedTensor.from_row_lengths(
    values=[3, 3, 3, 3, 5, 6, 7, 8, 9],
    row_lengths=[4, 3, 2]
    )

<tf.RaggedTensor [[3, 3, 3, 3], [5, 6, 7], [8, 9]]>

In [None]:
tf.RaggedTensor.from_row_splits(
    values=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
    row_splits=[0, 4, 4, 7, 7, 10]
)

<tf.RaggedTensor [[0, 1, 2, 3], [], [4, 5, 6], [], [7, 8, 9]]>

## Use cases of ragged tensor

In [None]:
queries = tf.ragged.constant([["I", "am", "vinay", "patil"],
                              ["I", "like", "voilet", "color"],
                              ["How", "do", "you", "about", "myself"],
                              ["I", "love", "ai"]
                          ])

In [None]:
queries

<tf.RaggedTensor [[b'I', b'am', b'vinay', b'patil'], [b'I', b'like', b'voilet', b'color'], [b'How', b'do', b'you', b'about', b'myself'], [b'I', b'love', b'ai']]>

In [None]:
num_buckets = 1024
embedding_size = 4
embedding_table = tf.Variable(tf.random.truncated_normal([num_buckets, embedding_size],
                              stddev=1.0 / math.sqrt(embedding_size)))
# embedding_table

In [None]:
w_b = tf.strings.to_hash_bucket_fast(queries, num_buckets)
w_b

<tf.RaggedTensor [[68, 492, 3, 302], [68, 731, 413, 521], [135, 494, 924, 254, 812], [68, 999, 222]]>

In [None]:
w_eb = tf.nn.embedding_lookup(embedding_table, w_b)
w_eb

<tf.RaggedTensor [[[-0.3074694275856018, 0.1387438327074051, -0.5223070979118347, 0.18394097685813904], [0.5005806684494019, -0.11261317133903503, -0.4146071672439575, -0.9394912719726562], [-0.20241416990756989, -0.37953367829322815, -0.2397526353597641, -0.4732277989387512], [0.47632691264152527, 0.6406905055046082, 0.518401026725769, -0.5452863574028015]], [[-0.3074694275856018, 0.1387438327074051, -0.5223070979118347, 0.18394097685813904], [-0.535005509853363, 0.034555207937955856, 0.0879291370511055, 0.04749893769621849], [0.5486158728599548, -0.09561555832624435, 0.3532993793487549, 0.43734487891197205], [-0.06668850779533386, -0.2533300518989563, -0.21676753461360931, 0.08614395558834076]], [[-0.5930306315422058, 0.28678014874458313, -0.12970347702503204, -0.602933406829834], [-0.4331868886947632, -0.2768113911151886, -0.22818784415721893, 0.4611639380455017], [0.24436143040657043, -0.19970650970935822, -0.22812137007713318, -0.478665828704834], [0.6626963019371033, -0.035777207

In [None]:
marker = tf.fill([queries.nrows(), 1], "#")
marker

<tf.Tensor: shape=(4, 1), dtype=string, numpy=
array([[b'#'],
       [b'#'],
       [b'#'],
       [b'#']], dtype=object)>

In [None]:
padding = tf.concat([marker, queries, marker], axis=1)
padding

<tf.RaggedTensor [[b'#', b'I', b'am', b'vinay', b'patil', b'#'], [b'#', b'I', b'like', b'voilet', b'color', b'#'], [b'#', b'How', b'do', b'you', b'about', b'myself', b'#'], [b'#', b'I', b'love', b'ai', b'#']]>

In [None]:
bigrams = tf.strings.join([padding[:,:-1], padding[:,1:]], separator="+")
bigrams

<tf.RaggedTensor [[b'#+I', b'I+am', b'am+vinay', b'vinay+patil', b'patil+#'], [b'#+I', b'I+like', b'like+voilet', b'voilet+color', b'color+#'], [b'#+How', b'How+do', b'do+you', b'you+about', b'about+myself', b'myself+#'], [b'#+I', b'I+love', b'love+ai', b'ai+#']]>

In [None]:
bigrams_bucket = tf.strings.to_hash_bucket_fast(bigrams, num_buckets)
bigrams_bucket

<tf.RaggedTensor [[915, 799, 831, 637, 521], [915, 423, 500, 869, 520], [852, 342, 454, 510, 938, 769], [915, 45, 232, 954]]>

In [None]:
bigrams_embedding = tf.nn.embedding_lookup(embedding_table, bigrams_bucket)
bigrams_embedding

<tf.RaggedTensor [[[-0.08664236217737198, 0.4958704113960266, -0.23709915578365326, -0.08385498076677322], [0.18282854557037354, 0.5458505749702454, -0.0706811174750328, 0.48843133449554443], [0.09522805362939835, 0.1081816703081131, 0.24982547760009766, -0.058480288833379745], [0.31604552268981934, 0.3032344877719879, -0.33055636286735535, -0.518042802810669], [-0.06668850779533386, -0.2533300518989563, -0.21676753461360931, 0.08614395558834076]], [[-0.08664236217737198, 0.4958704113960266, -0.23709915578365326, -0.08385498076677322], [-0.22734308242797852, -0.21734048426151276, -0.3664870858192444, 0.1948409229516983], [-0.8057729601860046, 0.744895339012146, -0.4131231904029846, 0.3355291783809662], [0.8800925016403198, -0.06627708673477173, -0.4496087431907654, -0.16952036321163177], [0.5177002549171448, -0.0284085962921381, -0.05992792174220085, 0.44094544649124146]], [[0.2527144253253937, -0.17588211596012115, 0.21114814281463623, 0.45934146642684937], [-0.1251469999551773, 0.376

In [None]:
all_embedding = tf.concat([w_eb, bigrams_embedding], axis=1)
all_embedding

<tf.RaggedTensor [[[-0.3074694275856018, 0.1387438327074051, -0.5223070979118347, 0.18394097685813904], [0.5005806684494019, -0.11261317133903503, -0.4146071672439575, -0.9394912719726562], [-0.20241416990756989, -0.37953367829322815, -0.2397526353597641, -0.4732277989387512], [0.47632691264152527, 0.6406905055046082, 0.518401026725769, -0.5452863574028015], [-0.08664236217737198, 0.4958704113960266, -0.23709915578365326, -0.08385498076677322], [0.18282854557037354, 0.5458505749702454, -0.0706811174750328, 0.48843133449554443], [0.09522805362939835, 0.1081816703081131, 0.24982547760009766, -0.058480288833379745], [0.31604552268981934, 0.3032344877719879, -0.33055636286735535, -0.518042802810669], [-0.06668850779533386, -0.2533300518989563, -0.21676753461360931, 0.08614395558834076]], [[-0.3074694275856018, 0.1387438327074051, -0.5223070979118347, 0.18394097685813904], [-0.535005509853363, 0.034555207937955856, 0.0879291370511055, 0.04749893769621849], [0.5486158728599548, -0.0956155583

In [None]:
avg_eb = tf.reduce_mean(all_embedding, axis=1)

In [None]:
avg_eb

<tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[ 0.10086613,  0.16523273, -0.14039384, -0.20665193],
       [-0.00916813,  0.083677  , -0.20267694,  0.1636521 ],
       [ 0.0540271 , -0.01928747, -0.17934328, -0.10588329],
       [ 0.00318101, -0.04714555, -0.22341633, -0.06630367]],
      dtype=float32)>

## Sparse tensor

In [None]:
ragged_str = tf.ragged.constant([["hi", "by"],
                                 ["no", "never", "last", "road"]])
ragged_str

<tf.RaggedTensor [[b'hi', b'by'], [b'no', b'never', b'last', b'road']]>

In [None]:
lasts = ragged_str.to_sparse()

In [None]:
print(lasts)

SparseTensor(indices=tf.Tensor(
[[0 0]
 [0 1]
 [1 0]
 [1 1]
 [1 2]
 [1 3]], shape=(6, 2), dtype=int64), values=tf.Tensor([b'hi' b'by' b'no' b'never' b'last' b'road'], shape=(6,), dtype=string), dense_shape=tf.Tensor([2 4], shape=(2,), dtype=int64))


## Ragged tensor in keras
### you can pass RaggedTensor as input to the kearas model by setting 
#### *ragged=True* on 
#### *tf.keras.Input* and 
#### *tf.keras.layers.Input*

In [None]:
question = tf.constant(["do you know about this comic?", 
                        "yes i Know.",
                        "Whats your Name?",
                        "xyz"])
isquestion = tf.constant([True, False, True, False])

In [None]:
words = tf.strings.split(question, " ")
words

<tf.RaggedTensor [[b'do', b'you', b'know', b'about', b'this', b'comic?'], [b'yes', b'i', b'Know.'], [b'Whats', b'your', b'Name?'], [b'xyz']]>

In [None]:
hash_words = tf.strings.to_hash_bucket_fast(words, 1000)
hash_words

<tf.RaggedTensor [[134, 668, 881, 62, 19, 49], [307, 100, 276], [223, 37, 572], [420]]>

In [None]:
model = tf.keras.Sequential([
                             tf.keras.layers.Input(shape=[None], dtype=tf.int64, ragged=True),
                             tf.keras.layers.Embedding(1000, 16),
                             tf.keras.layers.LSTM(32, use_bias=False),
                             tf.keras.layers.Dense(32, activation="relu"),
                             tf.keras.layers.Dense(1)
])

In [None]:
model.compile("adam", "binary_crossentropy")

In [None]:
model.fit(hash_words, isquestion, epochs=10)

Epoch 1/10


  "shape. This may consume a large amount of memory." % value)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f14d2d2e5f8>

In [None]:
model.predict(hash_words)

array([[0.04485743],
       [0.02749132],
       [0.03765893],
       [0.02229887]], dtype=float32)

### *tf.Example()*

In [None]:
import google.protobuf.text_format as pbtext

In [None]:
tf.io.RaggedFeature(tf.string)

RaggedFeature(dtype=tf.string, value_key=None, partitions=(), row_splits_dtype=tf.int32, validate=False)

In [None]:
tf.io.RaggedFeature(tf.int64)

RaggedFeature(dtype=tf.int64, value_key=None, partitions=(), row_splits_dtype=tf.int32, validate=False)

In [None]:
def build_tf_example(s):
  return pbtext.Merge(s, tf.train.Example()).SerializeToString()

In [None]:
example_batch = [
                 build_tf_example(r'''
                 features{
                   feature {key: "colors" value {bytes_list {value : ["red", "blue"]}}}
                   feature {key: "lengths" value {int64_list {value: [7, 9]}}}
                 }'''),
                 build_tf_example(r'''
                 features{
                   feature {key: "colors" value {bytes_list {value : ["orange"]}}}
                   feature {key: "lengths" value {int64_list {value: [7]}}}
                 }''')
]

In [None]:
example_batch[0]

b'\n,\n\x11\n\x07lengths\x12\x06\x1a\x04\n\x02\x07\t\n\x17\n\x06colors\x12\r\n\x0b\n\x03red\n\x04blue'

In [None]:
features = {
    "colors" : tf.io.RaggedFeature(tf.string),
    "lengths" : tf.io.RaggedFeature(tf.int64)
}

In [None]:
feature_tensor = tf.io.parse_example(example_batch, features)
feature_tensor

{'colors': <tf.RaggedTensor [[b'red', b'blue'], [b'orange']]>,
 'lengths': <tf.RaggedTensor [[7, 9], [7]]>}

In [None]:
feature_tensor.get("colors")

<tf.RaggedTensor [[b'red', b'blue'], [b'orange']]>

## *tf.data*
### this api allows to build complex input pipline from simple and reusable pices

### *tf.data.Dataset()*

In [None]:
def print_dataset(data):
  for i, features in enumerate(data):
    print(f"feature {i}: ")
    for k, v in features.items():
      print(f"{k} = {v}")

In [None]:
dataset = tf.data.Dataset.from_tensor_slices(feature_tensor)

In [None]:
print_dataset(dataset)

feature 0: 
colors = [b'red' b'blue']
lengths = [7 9]
feature 1: 
colors = [b'orange']
lengths = [7]


### batching and unbatching dataset

In [None]:
batch_dataset = dataset.batch(1)
print_dataset(batch_dataset)

feature 0: 
colors = <tf.RaggedTensor [[b'red', b'blue']]>
lengths = <tf.RaggedTensor [[7, 9]]>
feature 1: 
colors = <tf.RaggedTensor [[b'orange']]>
lengths = <tf.RaggedTensor [[7]]>


In [None]:
print_dataset(dataset)

feature 0: 
colors = [b'red' b'blue']
lengths = [7 9]
feature 1: 
colors = [b'orange']
lengths = [7]


### *tf.function()* with ragged tensor

In [None]:
@tf.function
def make_palindrome(tensor, axis=1):
  return tf.concat([tensor, tf.reverse(tensor, [axis])], axis)

In [None]:
make_palindrome(tf.ragged.constant([[1, 2], [3], [4, 5, 6]]), axis=1)

<tf.RaggedTensor [[1, 2, 2, 1], [3, 3], [4, 5, 6, 6, 5, 4]]>

### *tf.saved_model*

In [None]:
import tempfile

In [None]:
path = tempfile.mkdtemp()
path

'/tmp/tmpe4ro1s8b'

In [None]:
tf.saved_model.save(model, path)

INFO:tensorflow:Assets written to: /tmp/tmpe4ro1s8b/assets


In [None]:
import_model = tf.saved_model.load(path)

In [None]:
import_model(hash_words)

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[0.04485743],
       [0.02749131],
       [0.03765893],
       [0.02229887]], dtype=float32)>

### oprators

In [None]:
x = tf.ragged.constant([[3, 3, 5], [4], [4, 3, 4]])

In [None]:
x + 3

<tf.RaggedTensor [[6, 6, 8], [7], [7, 6, 7]]>

In [None]:
x / 2

<tf.RaggedTensor [[1.5, 1.5, 2.5], [2.0], [2.0, 1.5, 2.0]]>

In [None]:
x - 9

<tf.RaggedTensor [[-6, -6, -4], [-5], [-5, -6, -5]]>

### indexing

In [None]:
x[1]

<tf.Tensor: shape=(1,), dtype=int32, numpy=array([4], dtype=int32)>

In [None]:
x[2, 2]

<tf.Tensor: shape=(), dtype=int32, numpy=4>

In [None]:
x[2:]

<tf.RaggedTensor [[4, 3, 4]]>

In [None]:
x[:,:-1]

<tf.RaggedTensor [[3, 3], [], [4, 3]]>

In [None]:
x[:,1:-1]

<tf.RaggedTensor [[3], [], [3]]>

### type conversion

In [None]:
x.to_tensor()

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[3, 3, 5],
       [4, 0, 0],
       [4, 3, 4]], dtype=int32)>

In [None]:
x.to_tensor(default_value=-1)

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[ 3,  3,  5],
       [ 4, -1, -1],
       [ 4,  3,  4]], dtype=int32)>

In [None]:
tf.RaggedTensor.from_tensor(x.to_tensor(default_value=-1))

<tf.RaggedTensor [[3, 3, 5], [4, -1, -1], [4, 3, 4]]>

In [None]:
tf.RaggedTensor.from_tensor(x.to_tensor(default_value=-1), padding=-1)

<tf.RaggedTensor [[3, 3, 5], [4], [4, 3, 4]]>

In [None]:
print(x.to_sparse())

SparseTensor(indices=tf.Tensor(
[[0 0]
 [0 1]
 [0 2]
 [1 0]
 [2 0]
 [2 1]
 [2 2]], shape=(7, 2), dtype=int64), values=tf.Tensor([3 3 5 4 4 3 4], shape=(7,), dtype=int32), dense_shape=tf.Tensor([3 3], shape=(2,), dtype=int64))


### Evaluating Ragged Tensor

In [None]:
x.to_list()

[[3, 3, 5], [4], [4, 3, 4]]

In [None]:
x.numpy()

  return np.array(rows)


array([array([3, 3, 5], dtype=int32), array([4], dtype=int32),
       array([4, 3, 4], dtype=int32)], dtype=object)

In [None]:
x.row_lengths()

<tf.Tensor: shape=(3,), dtype=int64, numpy=array([3, 1, 3])>

In [None]:
x.row_splits

<tf.Tensor: shape=(4,), dtype=int64, numpy=array([0, 3, 4, 7])>

### Brodcasting


In [None]:
x + 3

<tf.RaggedTensor [[6, 6, 8], [7], [7, 6, 7]]>

### Ragged Tensor Encoding

In [None]:
x

<tf.RaggedTensor [[3, 3, 5], [4], [4, 3, 4]]>

In [None]:
x.row_splits

<tf.Tensor: shape=(4,), dtype=int64, numpy=array([0, 3, 4, 7])>

In [None]:
x.value_rowids()

<tf.Tensor: shape=(7,), dtype=int64, numpy=array([0, 0, 0, 1, 2, 2, 2])>

In [None]:
x.row_lengths()

<tf.Tensor: shape=(3,), dtype=int64, numpy=array([3, 1, 3])>

In [None]:
x.uniform_row_length

In [None]:
x.values

<tf.Tensor: shape=(7,), dtype=int32, numpy=array([3, 3, 5, 4, 4, 3, 4], dtype=int32)>