# Mazu Talk
Mazu Talk is a GPT style, Transformer based Decoder. The code is adapted from two sources:
* the [GPT tutorial](https://keras.io/examples/generative/text_generation_with_miniature_gpt/) by Apoorv Nandan available on the Keras website.
* Generative Deep Learning, 2nd edition, by David Foster (O’Reilly), 2023.

## Install libraries and dependencies

In [None]:
!pip install -U deep-translator
!poetry add deep-translator   # for poetry usage
!pip install gTTS

from deep_translator import GoogleTranslator
from gtts import gTTS

In [1]:
%load_ext autoreload
%autoreload 2
import glob
import numpy as np
import json
import re
import string
from IPython.display import display, HTML
import os

import tensorflow as tf
import keras
from tensorflow.keras import layers, models, losses, callbacks, saving

2024-04-13 06:23:36.120555: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Set min. log level for TF to mute warnings
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"

## Parameters

In [3]:
VOCAB_SIZE = 100000
MAX_LEN = 80
# EMBEDDING_DIM = 256
EMBEDDING_DIM = 512
# KEY_DIM = 256
KEY_DIM = 512
N_HEADS = 4
# FEED_FORWARD_DIM = 256
FEED_FORWARD_DIM = 512
VALIDATION_SPLIT = 0.2
SEED = 42
LOAD_MODEL = False
BATCH_SIZE = 32
EPOCHS = 1
DATASET_REPETITIONS = 1

## Load the data
Chinese Poems are sourced from:
* https://www.kaggle.com/datasets/qianboao/chinesepoetrydataset
* https://github.com/chinese-poetry/chinese-poetry

### Helper functions

In [None]:
# Clean a text from special characters
def clean_text(text):
    # Remove content within brackets
    pattern_brackets = r'\(.*?\)'
    cleaned_text = re.sub(pattern_brackets, '', text)
    
    # Remove newline characters (\n) and tab characters (\t)
    cleaned_text = cleaned_text.replace('\n', '').replace('\t', '')
    
    # Replace hyphens with whitespace
    cleaned_text = cleaned_text.replace('-', ' ')
    
    # Remove curly double quotes
    cleaned_text = cleaned_text.replace("“", '').replace("”", '')

    
    return cleaned_text

### Load Chinese poems from file

In [None]:
# Open file containing Chinese poetry
with open('/app/data/chinese-poetry/chinese_poems.txt', 'r') as f:
    zh_poems = f.readlines()
    
print(zh_poems[:5])
print(len(zh_poems))

### Translate Chinese poems to Swedish and save as files

In [None]:
# Automated workflow for translating Chinese poems

# Instantiate the Google Translator
translator = GoogleTranslator(source='zh-CN', target='sv')

for i in range(7000, 8000, 1000):
    print(f"translating batch {i}")
    # Create a List to store the translations in
    zh_poems_sv = []
    counter = 0

    for poem in zh_poems[i: i + 1000]:
        # Inform progress
        if counter % 200 == 0:
            print(f"{counter} translations done!")
        counter += 1
        # Clean the text
        poem = clean_text(poem)
        try:
            # Send a batch to the translator and append to the above list
            zh_poems_sv.append(translator.translate(poem))
        except:
            print("Error: Could not translate a poem.")
    # Save the batch as a json file
    with open("/app/data/zh_poems_sv/zh_poems_sv_%000006d_%000006d.json" % (i, i + 1000), 'w') as f:
        json.dump(zh_poems_sv, f)
    print("Done!")

### Load Swedish translations of Chinese poems from saved files

In [4]:
# Find all the files
file_list = glob.glob("/app/data/zh_poems_sv/*.json")
print(f"Found {len(file_list)} files")
file_list

# Put the file contents in a list
translations_sv = []
for file in file_list:
    with open(file, 'r') as f:
        for poem in json.load(f):
            translations_sv.append(poem)

# Print some examples of the list
print(f"Found {len(translations_sv)} poems")
translations_sv[:2]

Found 8 files
Found 8000 poems


['Vem känner inte till våren när en tjänsteman degraderas? Han kan fortfarande vara full efter att ha lämnat Guo. Silverpennan jagar Bao Xie och Ximen skriver en mening för att imitera molnet. Dongyuan vågar läsa Bai Pengxi och Nanmu borde arbeta med tusentals par. Det finns nya dikter kvar att tigga. Jag är inte alls ond, jag är rädd att jag hörs över hela himlen genom att slå på mitt horn.',
 'Glaset utanför bambun är tio hektar brett, med glaserade plattor ristade högt och lågt. Höstvinden blåser genom den kalldoftande jianjian, ensam och vacker, den svala månen är kall i gryningen. Den glada atmosfären är lika hög som att gå ut av samhället, men vem kan se charmen och sederna hos Yi. Jag skämtar om att jag kysser min moster idag, och Taihua sjunger högt. Han räknas inte med.']

### Load PhD, English version

In [None]:
with open('/app/data/stjernholm-texts/phd_thesis.txt', 'r') as f:
    phd = f.readlines()
phd[0:3]

In [None]:
# Remove headings and short paragraphs
def filter_long_strings(input_list, min_length=40):
    """
    Removes strings from the input list that have a length less than min_length.
    Args:
        input_list (list): List of strings.
        min_length (int, optional): Minimum length for strings to keep. Defaults to 40.

    Returns:
        list: Filtered list containing only strings with length greater than or equal to min_length.
    """
    return [string for string in input_list if len(string) > min_length]

# Example usage:
shorter_phd = filter_long_strings(phd)
shorter_phd[:2]

In [None]:
cleaned_phd = [clean_text(x) for x in shorter_phd]
cleaned_phd[:2]

### Translate PhD to Swedish and save as files

In [None]:
# Automated workflow for translating PhD to Swedish

# Instantiate the Google Translator
translator = GoogleTranslator(source='en', target='sv')

# Create a List to store the translations in
phd_sv = []

for section in cleaned_phd:
    # Clean the text
    section = clean_text(section)
    try:
        # Send a batch to the translator and append to the above list
        phd_sv.append(translator.translate(section))
    except:
        print("Error: Could not translate a section.")
        
# Save as a json file
with open("/app/data/stjernholm-texts/phd_thesis_sv.json", "w") as f:
    json.dump(phd_sv, f)
print("Done!")

### Load PhD Swedish version from file

In [5]:
with open("/app/data/stjernholm-texts/phd_thesis_sv.json", 'r') as f:
    translation_phd = json.load(f)

print(type(translation_phd))
print(len(translation_phd))
translation_phd[:2]

<class 'list'>
603


['Denna avhandling undersöker sambanden mellan subjektiv perception och dansrörelser. Dessa primära relationer innebär i sin tur ett brett spektrum av tvärvetenskapliga sekundära relationer, relaterade till förkroppsligande, minne, prestation, subjektivitet, olika former av representation, medling, observation, teori och koreografiska praktiker. Uppfattningen i fråga hänvisar till ett fenomenologiskt inducerat tillvägagångssätt, huvudsakligen härrörande från Ernst Cassirer, Maurice Merleau Ponty och Gaston Bachelard. Min tillämpning av fenomenologisk teori kännetecknas vidare av en särskild läsning av de tre begreppen performativitet, virtualitet och abstraktion. Dansrörelserna i min studie exemplifieras huvudsakligen av två korta utdrag ur Merce Cunninghams verk BIPED, representerade som subjektiva upplevelser av liveframträdanden, videoinspelningar, textrepresentationer och minnen. Dessutom överväger jag en textstudie om koreografiska praktiker av Susan Foster, och element från Willi

### Load the Databricks Dataset

In [None]:
data = []

with open("/app/data/databricks/databricks-dolly-15k.jsonl") as file:
    for line in file:
        feature = json.loads(line)
        
        if feature["context"]:
            continue
        
        data.append(feature)

data[:2]

In [None]:
# Filter the dataset
filtered_data = [
    x["instruction"]
    + " "
    + x["response"]
    for x in data
    if x["instruction"] is not None
    and x["response"] is not None
]

print(len(filtered_data))
filtered_data[:3]

### Translate Databricks dataset to Swedish

In [None]:
# Automated workflow for translating Databricks dataset to Swedish

# Instantiate the Google Translator
translator = GoogleTranslator(source='en', target='sv')

# Create a List to store the translations in
bricks_sv = []

counter = 0
for section in filtered_data:
    # Clean the text
    section = clean_text(section)
    try:
        # Show the progress for every 1000 section
        if counter % 1000 == 0:
            print(f"Translated {counter} sections")
        counter += 1
        # Send a batch to the translator and append to the above list
        bricks_sv.append(translator.translate(section))
    except:
        print("Error: Could not translate a section.")
        
# Save as a json file
with open("/app/data/databricks/databricks-dolly-15k-sv.json", "w") as f:
    json.dump(bricks_sv, f)
print("Done!")

### Load Swedish Databricks dataset from file

In [6]:
with open("/app/data/databricks/databricks-dolly-15k-sv.json", 'r') as f:
    translation_bricks = json.load(f)

In [None]:
# Look at the dataset
print(type(translation_bricks))
print(len(translation_bricks))
translation_bricks[:5]

### Concatenate the data

In [7]:
complete_data = translations_sv + translation_phd + translation_bricks
len(complete_data)

19001

## Tokenize the data

In [8]:
# Pad the punctuation, to treat them as separate 'words'
def pad_punctuation(s):
    s = re.sub(f"([{string.punctuation}, '\n'])", r" \1 ", s)
    s = re.sub(" +", " ", s)
    return s

text_data = [pad_punctuation(x) for x in complete_data]

In [None]:
# Display an example of a recipe
example_data = text_data[25]
example_data

In [9]:
# Convert to a Tensorflow Dataset
text_ds = (
    tf.data.Dataset.from_tensor_slices(text_data)
    .batch(BATCH_SIZE)
    .shuffle(1000)
)

2024-04-13 06:24:05.199221: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-13 06:24:05.204463: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-13 06:24:05.204492: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-13 06:24:05.206999: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-13 06:24:05.207029: I external/local_xla/xla/stream_executor

In [10]:
# Create a vectorisation layer
vectorize_layer = layers.TextVectorization(
    standardize="lower",
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=MAX_LEN + 1,
)

In [11]:
# Adapt the layer to the training set
vectorize_layer.adapt(text_ds)
vocab = vectorize_layer.get_vocabulary()

2024-04-13 06:24:23.110105: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [None]:
# Display some token:word mappings
for i, word in enumerate(vocab[:10]):
    print(f"{i}: {word}")

In [None]:
# Display the same example converted to ints
example_tokenised = vectorize_layer(example_data)
print(example_tokenised.numpy())

## Create the Training Set

In [12]:
# Create the training set of recipes and the same text shifted by one word
def prepare_inputs(text):
    text = tf.expand_dims(text, -1)
    tokenized_sentences = vectorize_layer(text)
    x = tokenized_sentences[:, :-1]
    y = tokenized_sentences[:, 1:]
    return x, y


# train_ds = text_ds.map(prepare_inputs)
train_ds = text_ds.map(prepare_inputs).repeat(DATASET_REPETITIONS)

In [None]:
example_input_output = train_ds.take(1).get_single_element()
# Example Input
example_input_output[0][0]

In [None]:
# Example Output (shifted by one token)
example_input_output[1][0]

## Create the Causal Attention Mask function

In [13]:
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
    i = tf.range(n_dest)[:, None]
    j = tf.range(n_src)
    m = i >= j - n_src + n_dest
    mask = tf.cast(m, dtype)
    mask = tf.reshape(mask, [1, n_dest, n_src])
    mult = tf.concat(
        [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
    )
    return tf.tile(mask, mult)


np.transpose(causal_attention_mask(1, 10, 10, dtype=tf.int32)[0])

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]], dtype=int32)

## Create a Transformer Block layer

In [29]:
@keras.saving.register_keras_serializable()
class TransformerBlock(layers.Layer):
    # def __init__(self, num_heads, key_dim, embed_dim, ff_dim, dropout_rate=0.1):
    def __init__(self, num_heads, key_dim, embed_dim, ff_dim, dropout_rate=0.1, **kwargs):
        # super(TransformerBlock, self).__init__(**kwargs)
        super(TransformerBlock, self).__init__()
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.embed_dim = embed_dim
        self.ff_dim = ff_dim
        self.dropout_rate = 0.1 # dropout_rate
        self.attn = layers.MultiHeadAttention(
            num_heads, key_dim, output_shape=embed_dim
        )
        self.dropout_1 = layers.Dropout(self.dropout_rate)
        self.ln_1 = layers.LayerNormalization(epsilon=1e-6)
        self.ffn_1 = layers.Dense(self.ff_dim, activation="relu")
        self.ffn_2 = layers.Dense(self.embed_dim)
        self.dropout_2 = layers.Dropout(self.dropout_rate)
        self.ln_2 = layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(
            batch_size, seq_len, seq_len, tf.bool
        )
        attention_output, attention_scores = self.attn(
            inputs,
            inputs,
            attention_mask=causal_mask,
            return_attention_scores=True,
        )
        attention_output = self.dropout_1(attention_output)
        out1 = self.ln_1(inputs + attention_output)
        ffn_1 = self.ffn_1(out1)
        ffn_2 = self.ffn_2(ffn_1)
        ffn_output = self.dropout_2(ffn_2)
        return (self.ln_2(out1 + ffn_output), attention_scores)

    # def get_config(self):
    #     config = super().get_config()
    #     config.update(
    #         {
    #             "key_dim": self.key_dim,
    #             "embed_dim": self.embed_dim,
    #             "num_heads": self.num_heads,
    #             "ff_dim": self.ff_dim,
    #             "dropout_rate": self.dropout_rate,
    #         }
    #     )
    #     return config

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "key_dim": keras.saving.serialize_keras_object(self.key_dim),
                "embed_dim": keras.saving.serialize_keras_object(self.embed_dim),
                "num_heads": keras.saving.serialize_keras_object(self.num_heads),
                "ff_dim": keras.saving.serialize_keras_object(self.ff_dim),
                "dropout_rate": keras.saving.serialize_keras_object(self.dropout_rate),
            }
        )
        return config

    @classmethod
    def from_config(cls, config):
        key_dim_config = config["key_dim"]
        embed_dim_config = config["embed_dim"]
        num_heads_config = config["num_heads"]
        ff_dim_config = config["ff_dim"]
        dropout_rate_config = config["dropout_rate"]
        key_dim = keras.saving.deserialize_keras_object(key_dim_config)
        embed_dim = keras.saving.deserialize_keras_object(embed_dim_config)
        num_heads = keras.saving.deserialize_keras_object(num_heads_config)
        ff_dim = keras.saving.deserialize_keras_object(ff_dim_config)
        num_heads = keras.saving.deserialize_keras_object(num_heads_config)
        return cls(key_dim, embed_dim, num_heads, ff_dim, num_heads)

## Create Token and Position Embedding

In [30]:
@keras.saving.register_keras_serializable()
class TokenAndPositionEmbedding(layers.Layer):
    # def __init__(self, max_len, vocab_size, embed_dim):
    def __init__(self, max_len, vocab_size, embed_dim, **kwargs):
        # super(TokenAndPositionEmbedding, self).__init__()
        super(TokenAndPositionEmbedding, self).__init__(**kwargs)
        self.max_len = max_len
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.token_emb = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.pos_emb = layers.Embedding(input_dim=max_len, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    # def get_config(self):
    #     config = super().get_config()
    #     config.update(
    #         {
    #             "max_len": self.max_len,
    #             "vocab_size": self.vocab_size,
    #             "embed_dim": self.embed_dim,
    #         }
    #     )
    #     return config

    def get_config(self):
        config = super().get_config()
        config.update(
            {
            "max_len": keras.saving.serialize_keras_object(self.max_len),
            "vocab_size": keras.saving.serialize_keras_object(self.vocab_size),
            "embed_dim": keras.saving.serialize_keras_object(self.embed_dim),
            }   
        )  
        return config

    @classmethod
    def from_config(cls, config):
        # sublayer_config = config.pop("sublayer")
        max_len_config = config["max_len"]
        vocab_size_config = config["vocab_size"]
        embed_dim_config = config["embed_dim"]
        max_len = keras.saving.deserialize_keras_object(max_len_config)
        vocab_size = keras.saving.deserialize_keras_object(vocab_size_config)
        embed_dim = keras.saving.deserialize_keras_object(embed_dim_config)
        return cls(max_len, vocab_size, embed_dim)

## Build the Transformer Model

In [31]:
inputs = layers.Input(shape=(None,), dtype=tf.int32)
x = TokenAndPositionEmbedding(MAX_LEN, VOCAB_SIZE, EMBEDDING_DIM)(inputs)
x, attention_scores = TransformerBlock(
    N_HEADS, KEY_DIM, EMBEDDING_DIM, FEED_FORWARD_DIM
)(x)
outputs = layers.Dense(VOCAB_SIZE, activation="softmax")(x)
gpt = models.Model(inputs=inputs, outputs=[outputs, attention_scores])
gpt.compile("adam", loss=[losses.SparseCategoricalCrossentropy(), None])

In [32]:
gpt.summary()

In [33]:
# if LOAD_MODEL:
if True:
    gpt.load_weights("./checkpoint/checkpoint.weights.h5")

## Train the Transformer

In [34]:
# Create a TextGenerator checkpoint
class TextGenerator(callbacks.Callback):
    def __init__(self, index_to_word, top_k=10):
        self.index_to_word = index_to_word
        self.word_to_index = {
            word: index for index, word in enumerate(index_to_word)
        }

    def sample_from(self, probs, temperature):
        probs = probs ** (1 / temperature)
        probs = probs / np.sum(probs)
        return np.random.choice(len(probs), p=probs), probs

    def generate(self, start_prompt, max_tokens, temperature):
        start_tokens = [
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ]
        sample_token = None
        info = []
        while len(start_tokens) < max_tokens and sample_token != 0:
            x = np.array([start_tokens])
            y, att = self.model.predict(x, verbose=0)
            sample_token, probs = self.sample_from(y[0][-1], temperature)
            info.append(
                {
                    "prompt": start_prompt,
                    "word_probs": probs,
                    "atts": att[0, :, -1, :],
                }
            )
            start_tokens.append(sample_token)
            start_prompt = start_prompt + " " + self.index_to_word[sample_token]
        print(f"\ngenerated text:\n{start_prompt}\n")
        return info

    def on_epoch_end(self, epoch, logs=None):
        self.generate("Vatten", max_tokens=80, temperature=1.0)

In [35]:
# Create a model save checkpoint
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath="./checkpoint/checkpoint.weights.h5",
    save_weights_only=True,
    save_freq="epoch",
    verbose=0,
)

tensorboard_callback = callbacks.TensorBoard(log_dir="./logs")

# Tokenize starting prompt
text_generator = TextGenerator(vocab)

In [36]:
gpt.fit(
    train_ds,
    epochs=EPOCHS,
    callbacks=[model_checkpoint_callback, tensorboard_callback, text_generator],
)

I0000 00:00:1712990210.568459    7169 service.cc:145] XLA service 0x7f1424012b50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1712990210.568513    7169 service.cc:153]   StreamExecutor device (0): NVIDIA RTX A2000 8GB Laptop GPU, Compute Capability 8.6
2024-04-13 06:36:50.615815: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
W0000 00:00:1712990210.704922    7169 assert_op.cc:38] Ignoring Assert operator compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert
2024-04-13 06:36:50.842635: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8906




















I0000 00:00:1712990231.761907    7169 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m136/594[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m1:16[0m 167ms/step - loss: 0.4016

W0000 00:00:1712990254.852151    7171 assert_op.cc:38] Ignoring Assert operator compile_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert























[1m594/594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 199ms/step - loss: 0.4327



































































































































generated text:
Vatten är inte skattedagen den 15 april ? inkomstskatter i allmänhet måste i allmänhet lämnas in senast den 15 april . men när den 15 april infaller på en helg eller helgdag flyttas anmälningstiden till nästa arbetsdag . 

[1m594/594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m226s[0m 341ms/step - loss: 0.4327


<keras.src.callbacks.history.History at 0x7f14e5101e90>

### Save the entire model

In [37]:
# Save model
gpt.save("./models/gpt.keras")

In [39]:
# Load model
new_model = saving.load_model(
    "./models/gpt.keras",
    custom_objects={
        'TokenAndPositionEmbedding': TokenAndPositionEmbedding,
        'TransformerBlock': TransformerBlock,
    },
)



RuntimeError: Exception encountered when calling TokenAndPositionEmbedding.call().

[1mCould not automatically infer the output shape / dtype of 'token_and_position_embedding_6' (of type TokenAndPositionEmbedding). Either the `TokenAndPositionEmbedding.call()` method is incorrect, or you need to implement the `TokenAndPositionEmbedding.compute_output_spec() / compute_output_shape()` method. Error encountered:

Variable transformer_block_5/multi_head_attention_5/query/kernel is already initialized.[0m

Arguments received by TokenAndPositionEmbedding.call():
  • args=('<KerasTensor shape=(None, None), dtype=int32, sparse=False, name=input_layer_4>',)
  • kwargs=<class 'inspect._empty'>

# Text to Speech

In [None]:
tts = gTTS('Jag heter Johan', lang='sv', slow=True)
tts.save('hello.mp3')

# Generate Text

In [41]:
info = text_generator.generate(
    "Meningen med livet", max_tokens=80, temperature=0.5
)


generated text:
Meningen med livet och det finns inget sätt att bo i val , men det finns många sätt att bo i en dröm . en bra person kan ses som ett nytt hus . den här listan är några förslag på att tala i byggde den dåliga stora pyramiden för av kanalen i den här världen : * höjd kryper här är några enkla stora snacks , utmärkta kollektivtrafik , * kollektivtrafik , kollektivtrafiken eller transporter . 

