## Lab : Using Padding

### Overview
Learn how to use padding

### Runtime
~15 minutes

## Step 1 - Start with Tokenizer

In [1]:
from tensorflow.keras.preprocessing.text import Tokenizer

sentences = [
    'I like apples',
    'I really like bananas']

tokenizer = Tokenizer(num_words=100, oov_token='<OOV>')

## encodes text, assignes unique encoding for each words
tokenizer.fit_on_texts(sentences)

word_index = tokenizer.word_index
print ("word index:", tokenizer.word_index)
print ("word count:", tokenizer.word_counts)

word index: {'<OOV>': 1, 'i': 2, 'like': 3, 'apples': 4, 'really': 5, 'bananas': 6}
word count: OrderedDict([('i', 2), ('like', 2), ('apples', 1), ('really', 1), ('bananas', 1)])


## Step 2 - Text to Sequences

In [2]:
test_sentences = [
    'I like apples',
    'I love yellow bananas',
    'brown cow'
]

sequences = tokenizer.texts_to_sequences(test_sentences)

for i, line in enumerate(test_sentences):
    print (line, '-->' , sequences[i] )

I like apples --> [2, 3, 4]
I love yellow bananas --> [2, 1, 1, 6]
brown cow --> [1, 1]


## Step 3 - Padding

In [3]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

padded = pad_sequences(sequences)
print ("With padding:")
for i, line in enumerate(test_sentences):
    print (line, '-->' , padded[i] )

With padding:
I like apples --> [0 2 3 4]
I love yellow bananas --> [2 1 1 6]
brown cow --> [0 0 1 1]


## Step 4 - Pad the end of sentence

In [4]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

padded = pad_sequences(sequences, padding='post')
print ("With padding:")
for i, line in enumerate(test_sentences):
    print (line, '-->' , padded[i] )

With padding:
I like apples --> [2 3 4 0]
I love yellow bananas --> [2 1 1 6]
brown cow --> [1 1 0 0]


## Step 5 - Padding Length

In [5]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

padded = pad_sequences(sequences, padding='post', maxlen=10)
print ("With padding:")
for i, line in enumerate(test_sentences):
    print (line, '-->' , padded[i] )

With padding:
I like apples --> [2 3 4 0 0 0 0 0 0 0]
I love yellow bananas --> [2 1 1 6 0 0 0 0 0 0]
brown cow --> [1 1 0 0 0 0 0 0 0 0]
