In [4]:
import numpy as np
import re
import string
from IPython.display import display, HTML

import tensorflow as tf
from tensorflow.keras import layers, models, losses, callbacks

In [17]:
VOCAB_SIZE = 50000
MAX_LEN = 128
EMBEDDING_DIM = 256
KEY_DIM = 256
N_HEADS = 4
FEED_FORWARD_DIM = 256
SEED = 42
BATCH_SIZE = 10
EPOCHS = 5

In [18]:
with open("sample.txt", 'r', encoding='utf-8') as f:
    recipes = [line for line in f]

In [19]:
# Pad the punctuation, to treat them as separate 'words'
def pad_punctuation(s):
    s = re.sub(f"([{string.punctuation}, '\n'])", r" \1 ", s)
    s = re.sub(" +", " ", s)
    return s


text_data = [pad_punctuation(x) for x in recipes]

In [20]:
# Convert to a Tensorflow Dataset
text_ds = (
    tf.data.Dataset.from_tensor_slices(text_data)
    .batch(BATCH_SIZE)
    .shuffle(1000)
)

In [21]:
# Create a vectorisation layer
vectorize_layer = layers.TextVectorization(
    standardize="lower",
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=MAX_LEN + 1,
)

In [22]:
# Adapt the layer to the training set
vectorize_layer.adapt(text_ds)
vocab = vectorize_layer.get_vocabulary()

In [23]:
# Create the training set of recipes and the same text shifted by one word
def prepare_inputs(text):
    text = tf.expand_dims(text, -1)
    tokenized_sentences = vectorize_layer(text)
    x = tokenized_sentences[:, :-1]
    y = tokenized_sentences[:, 1:]
    return x, y


train_ds = text_ds.map(prepare_inputs)

In [24]:
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
    i = tf.range(n_dest)[:, None]
    j = tf.range(n_src)
    m = i >= j - n_src + n_dest
    mask = tf.cast(m, dtype)
    mask = tf.reshape(mask, [1, n_dest, n_src])
    mult = tf.concat(
        [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
    )
    return tf.tile(mask, mult)

class TransformerBlock(layers.Layer):
    def __init__(self, num_heads, key_dim, embed_dim, ff_dim, dropout_rate=0.2):
        super(TransformerBlock, self).__init__()
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.embed_dim = embed_dim
        self.ff_dim = ff_dim
        self.dropout_rate = dropout_rate
        self.attn = layers.MultiHeadAttention(
            num_heads, key_dim, output_shape=embed_dim
        )
        self.dropout_1 = layers.Dropout(self.dropout_rate)
        self.ln_1 = layers.LayerNormalization(epsilon=1e-6)
        self.ffn_1 = layers.Dense(self.ff_dim, activation="relu")
        self.ffn_2 = layers.Dense(self.embed_dim)
        self.dropout_2 = layers.Dropout(self.dropout_rate)
        self.ln_2 = layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(
            batch_size, seq_len, seq_len, tf.bool
        )
        attention_output, attention_scores = self.attn(
            inputs,
            inputs,
            attention_mask=causal_mask,
            return_attention_scores=True,
        )
        attention_output = self.dropout_1(attention_output)
        out1 = self.ln_1(inputs + attention_output)
        ffn_1 = self.ffn_1(out1)
        ffn_2 = self.ffn_2(ffn_1)
        ffn_output = self.dropout_2(ffn_2)
        return (self.ln_2(out1 + ffn_output), attention_scores)

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "key_dim": self.key_dim,
                "embed_dim": self.embed_dim,
                "num_heads": self.num_heads,
                "ff_dim": self.ff_dim,
                "dropout_rate": self.dropout_rate,
            }
        )
        return config

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, max_len, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.max_len = max_len
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.token_emb = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.pos_emb = layers.Embedding(input_dim=max_len, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "max_len": self.max_len,
                "vocab_size": self.vocab_size,
                "embed_dim": self.embed_dim,
            }
        )
        return config

In [25]:
inputs = layers.Input(shape=(None,), dtype=tf.int32)
x = TokenAndPositionEmbedding(MAX_LEN, VOCAB_SIZE, EMBEDDING_DIM)(inputs)
x, attention_scores = TransformerBlock(
    N_HEADS, KEY_DIM, EMBEDDING_DIM, FEED_FORWARD_DIM
)(x)
outputs = layers.Dense(VOCAB_SIZE, activation="softmax")(x)
model = models.Model(inputs=inputs, outputs=[outputs, attention_scores])
model.compile("adam", loss=[losses.SparseCategoricalCrossentropy(), None])




In [26]:
model.summary()

In [2]:
class TextGenerator(callbacks.Callback):
    def __init__(self, index_to_word, top_k=10):
        self.index_to_word = index_to_word
        self.word_to_index = {
            word: index for index, word in enumerate(index_to_word)
        }

    def sample_from(self, probs, temperature):
        probs = probs ** (1 / temperature)
        probs = probs / np.sum(probs)
        return np.random.choice(len(probs), p=probs), probs

    def generate(self, start_prompt, max_tokens, temperature):
        start_tokens = [
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ]
        sample_token = None
        info = []
        while len(start_tokens) < max_tokens and sample_token != 0:
            x = np.array([start_tokens])
            y, att = self.model.predict(x, verbose=0)
            sample_token, probs = self.sample_from(y[0][-1], temperature)
            info.append(
                {
                    "prompt": start_prompt,
                    "word_probs": probs,
                    "atts": att[0, :, -1, :],
                }
            )
            start_tokens.append(sample_token)
            start_prompt = start_prompt + " " + self.index_to_word[sample_token]
        print(f"\ngenerated text:\n{start_prompt}\n")
        return info

    def on_epoch_end(self, epoch, logs=None):
        self.generate("recipe for", max_tokens=MAX_LEN, temperature=0.7)

NameError: name 'callbacks' is not defined

In [28]:
# Create a model save checkpoint
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath="./checkpoint/checkpoint.weights.h5",
    save_weights_only=True,
    save_freq="epoch",
    verbose=0,
)

tensorboard_callback = callbacks.TensorBoard(log_dir="./logs")

# Tokenize starting prompt
text_generator = TextGenerator(vocab)

In [29]:
model.fit(
    train_ds,
    epochs=EPOCHS,
    callbacks=[model_checkpoint_callback, tensorboard_callback, text_generator],
)

Epoch 1/5


[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 6.1330
generated text:
recipe for [ " , " 1 eggs " , " 1 / 2 c . " 1 lb . flour " , " , " 1 / 4 c . salt " 1 c . " , " , " , " , " , " 1 c . water " 1 / 4 c . " 1 c . " , " 1 / 2 c . vinegar " , " , " , " , " , " , " 1 / 2 c . onion , " , " , " , " , " , " , " , " , " 1 / 2 hours . " 1 c . chopped " ] [ " , " 1 / 2 c .

[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 1s/step - loss: 6.1139
Epoch 2/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 2.4798
generated text:
recipe for 2 ' s onion soup [ " , " 1 / 2 c . sugar " 1 - inch ) " 1 / 2 c . lemon juice " , " , " , " , " 1 / 2 c . salt " 1 / 2 c . baking powder " , " , " , " 1 / 4 c . salt " 1 / 2 c . lemon juice " , " 1 / 2 c . minced onions " 1 / 2 c . peppers , " , " 1 / 2 tsp . pepper " , " , " 1 / 2 tsp . salt " 1 / 2 c . soda " , " ] [ " add milk and

[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x282f3bfdb40>

In [32]:
model.save("./models/model.h5")




In [33]:
info = text_generator.generate(
    "recipe for vanilla whipped cream parfaits with berries |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for vanilla whipped cream parfaits with berries | " , " 1 can condensed milk " , " 2 c . crushed pineapple " , " 1 / 2 c . chopped fresh apple " , " 1 / 4 tsp . water " ] [ " , " cream together . " , " , " , " cream cheese and marshmallows and velveeta . " use a little saver and cheese on fruit of nutmeg . " , " , " , " , " , " 1 box of mayonnaise , " 2 eggs , " 1 / 2 tbsp . " cook and cheese and salt and refrigerate . " add eggs , " bake at 350 \ u00b0 for 1 1 hour



In [1]:
info = text_generator.generate(
    "recipe for chocolate chip cookies |", max_tokens=MAX_LEN, temperature=0.7
)

NameError: name 'text_generator' is not defined

In [35]:
info = text_generator.generate(
    "recipe for chicken noodle soup |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for chicken noodle soup | [ " , " 1 large can cream of chicken soup " , " 1 c . chicken soup " , " 1 can cream of chicken pieces " , " 1 / 2 chicken bouillon " , " 1 / 2 c . chicken soup " ] [ " , " , " preheat oven to 1 / 2 cake mix all ingredients . " , " mix well . " , " , " 1 / 4 to 1 / 2 cups of baking dish with pecans . " wrap . " , " , " sprinkle bacon mixture of cheese soup , " bake at 350 \ u00b0 for 1 1 / 2 to 10 minutes . " ,



In [38]:
info = text_generator.generate(
    "recipe for egg |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for egg | [ " 1 / 2 c . unsweetened chocolate " , " 1 / 4 c . sugar " , " 1 c . sugar " , " 1 / 4 tsp . salt " , " 1 / 2 stick butter " , " 1 / 2 tsp . salt " , " 1 / 2 tsp . vanilla " ] [ " melt margarine , " preheat oven to a skillet . " and beat cream together . " bake at 350 \ u00b0 . " , " , " , " , " toss well . " , " , " , " add side of milk , " add vanilla . " , " ] 



In [20]:
info = text_generator.generate(
    "recipe for grilled cheese |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for grilled cheese | mix all ingredients together and press onto bottom and up side of a greased grill pan . bake at 350 degrees for 15 to 20 minutes . 



In [21]:
info = text_generator.generate(
    "recipe for chicken breast |", max_tokens=MAX_LEN, temperature=0.7
)


generated text:
recipe for chicken breast | place chicken breast in a large pot over medium heat , add the olive oil , season with salt , pepper , cayenne pepper and cumin . cover and cook over low heat , stirring occasionally , until the chicken is cooked through and the water is absorbed , about 15 minutes . remove from the heat . add the wine and stir to combine . cover the pan , and cook until the chicken is cooked through , about 10 minutes . stir in the wine , salt , and pepper . cover and simmer until the chicken is tender , about 20 minutes . ( this will take about 15 minutes ) . serve with lemon - lime juice , lemon

