In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import re

In [None]:
class TransformerModel:
    def __init__(self, vocab_size, embedding_dim=256, num_blocks = 1, num_heads = 2, ff_dim=256, max_len=20, dropout_rate=0.1):
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.num_blocks = num_blocks
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.max_len = max_len
        self.dropout_rate = dropout_rate
    
    def TransformerBlock(self,inputs):

        mha = tf.keras.layers.MultiHeadAttention(num_heads=self.num_heads,key_dim=self.embedding_dim, name="mha")(inputs,inputs, use_causal_mask = True)
        print(mha)
        dropout1 = tf.keras.layers.Dropout(rate=self.dropout_rate, name='dropout1')(mha)
        print(dropout1)
        # add1 = tf.keras.layers.Add()([inputs, dropout1])

        norm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6,name = "norm1")(inputs + dropout1)
        print(norm1)
        fc1 = tf.keras.layers.Dense(self.ff_dim,"relu",name='fc1')(norm1)
        print(fc1)
        fc2 = tf.keras.layers.Dense(self.embedding_dim,"relu",name='fc2')(fc1)
        print(fc2)
        dropout2 = tf.keras.layers.Dropout(rate=self.dropout_rate,name='dropout2')(fc2)
        print(dropout2)
        # add2 = tf.keras.layers.Add()([norm1,dropout2])
        norm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6,name='norm2')(norm1+dropout2)
        print(norm2)

        return norm2
    
    def EmbeddingLayer(self, inputs):
        word_embedding = tf.keras.layers.Embedding(self.vocab_size,self.embedding_dim,name="word_embedding")(inputs)
        pos_embedding = tf.keras.layers.Embedding(self.max_len,self.embedding_dim,name="pos_embedding")(tf.range(self.max_len))

        add1 = tf.keras.layers.Add(name="add1")([word_embedding,pos_embedding])

        return add1
    
    def create_model(self):
        inputs = layers.Input(shape=(self.max_len,), name='inputs')
        print(inputs)
        embedding = self.EmbeddingLayer(inputs)
        print(embedding)
        tmp = embedding
        for i in range(self.num_blocks):
            tmp = self.TransformerBlock(tmp)

        tmp = layers.Flatten()(tmp)

        outputs = layers.Dense(self.vocab_size, activation='softmax', name='outputs')(tmp)
        print(outputs)
        model = keras.Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        return model




In [None]:
import numpy as np

class DataSet:
    def __init__(self, filename, sequence_length):
        with open(filename, 'r') as file:
            self.text = file.read()
        self.sequence_length = sequence_length
        
    def prep_text(self):
        self.text = re.sub(r'[^\w\s]', '', self.text)
        # self.text = re.sub(r"[^a-zA-Z0-9\n]+", ' ', self.text)
        # self.text = re.sub(r"(\s+'\s)|(\s+'$)|(^'\s)", ' ', self.text)
        # self.text = self.text.replace('\r', '').replace('\t', '    ').replace('\f', '')
    
    def tokenize_text(self):
        self.vocab = np.unique(list(self.text))
        self.vocab_size = len(self.vocab)
        self.char_to_idx = {char: idx for idx, char in enumerate(self.vocab)}
        self.idx_to_char = {idx: char for char, idx in self.char_to_idx.items()}
        self.text = np.array([self.char_to_idx[c] for c in self.text])
        
    def create_dataset(self):
        #split the tokenized data into sequences of length seq_len, return the sequences and vocab
        self.prep_text()
        self.tokenize_text()
        x = []
        y = []
        for i in range(len(self.text) - self.sequence_length - 1):
            x.append(self.text[i:i+self.sequence_length])
            y.append(self.text[i+1:i+self.sequence_length+1])
        return x, y, self.vocab

In [None]:
class GenerateText():
    def __init__(self, model, vocab):
        self.vocab = vocab
        self.model = model

    
    def generate_text(self, start_string, num_generate=100):
        #generate text using the model and vocab, start with the start_string and generate num_generate words
        #use the model to predict the next word, then add it to the input and predict the next word, repeat until num_generate words have been generated

        #convert the start_string to a list of numbers using the vocab
        start_tokens = [np.where(self.vocab == word)[0][0] for word in start_string.split(' ')]
        
        for i in range(num_generate):
            #use the model to predict the next word
            prediction = self.model.predict(start_tokens)
            #add the predicted word to the input
            next_token = np.argmax(prediction)
            start_tokens.append(next_token)
        #convert the list of numbers back to a string using the vocab
        return ' '.join([self.vocab[i] for i in start_tokens])
    
    def generate_random_text(self, num_generate=100):
        #generate text using the model and vocab, start with a random word and generate num_generate words

        #choose a random word from the vocab as the start_string
        start_string = np.random.choice(self.vocab)
        return self.generate_text(start_string, num_generate)

In [None]:
#Train the model while periodically generating text to show progress
def train_model(model, vocab, x, y, epochs=50, verbose=1):

    # gen_text = GenerateText(model, vocab)
 
    print("BEFORE FIT")
    model = model.fit(x, y,verbose=2,epochs=epochs,use_multiprocessing=True)
    print("PASSED FIT")
        # if verbose == 1:
        #     #generate text using the model
        #     print(f'Epoch {i}')
        #     print(gen_text.generate_random_text())
        #     print('\n\n')
    return model

In [None]:
dataset = DataSet('beatles.txt', 20)
x, y, vocab = dataset.create_dataset()


In [None]:
model = TransformerModel(len(vocab))
model = model.create_model()

# Train the model
model = train_model(model=model, vocab=vocab, x=x, y=y, epochs=1, verbose=1)