# Project 4
## Students:
 > Austin Houston,
 > Alexander Krneta
 
 

In [5]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import re

print(tf.__version__)# you may want to upgrade to 2.10.0 


2.9.0


## Task 1

In [7]:
class TransformerModel(keras.Model):
    def __init__(self, vocab_size, num_TransformerBlocks, embed_dim=256, num_heads=2, num_blocks=1, ff_dim=256, maxlen=80, dropout_rate=0.1):
        super().__init__()
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.max_length = maxlen
        self.dropout_rate = dropout_rate
        
        # Initialize input layer
        self.inputs = keras.Input(shape=(None, self.embed_dim))

        # Initialize embedding layer
        self.embedding_layer = self.EmbeddingLayer(vocab_size, embed_dim, input_length=maxlen)
        
        # Initialize transformer blocks
        self.transformer_blocks = [self.TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate) for _ in range(num_TransformerBlocks)]
        
        # Initialize output layer
        self.output_layer = layers.Dense(vocab_size, activation='softmax')


    def TransformerBlock(self):
        # Multi-Head Attention layer 
        # Sums the input to the block and the output from the first dropout
        attention = layers.MultiHeadAttention(num_heads=self.num_heads, key_dim=self.embed_dim)(self.inputs, self.inputs)
        attention = layers.Dropout(rate=self.dropout_rate)(attention)
        attention = layers.LayerNormalization(epsilon=1e-6)(layers.Add()([self.inputs, attention]))
        
        # Feed-Forward Dense layer
        # Sums the output of the first LayerNormalization and second dropout
        dense = layers.Dense(units=self.ff_dim, activation='relu')(attention)
        dense = layers.Dropout(rate=self.dropout_rate)(dense)
        dense = layers.Dense(units=self.embed_dim)(dense)
        dense = layers.Dropout(rate=self.dropout_rate)(dense)
        dense = layers.LayerNormalization(epsilon=1e-6)(layers.Add()([attention, dense]))
        
        # Build the transformer block
        transformer_block = keras.Model(inputs=self.inputs, outputs=dense, name='transformer_block')
        return transformer_block
    

    def EmbeddingLayer(self):
        self.token_embedding = layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, input_length=self.max_length)
        self.positional_embedding = layers.Embedding(input_dim=self.max_length, output_dim=self.embed_dim, input_length=self.max_length, embeddings_initializer=keras.initializers.RandomUniform())
        self.dropout = layers.Dropout(self.dropout_rate)

        position_ids = tf.range(start=0, limit=tf.shape(self.inputs)[-1], delta=1, dtype=tf.int32)
        position_embedding = self.positional_embedding(position_ids)
        token_embedding = self.token_embedding(self.inputs)
        embeddings = token_embedding + position_embedding
        embeddings = self.dropout(embeddings)        


    def create_model(self,vocab_size, num_TransformerBlocks, embed_dim, num_layers, num_heads, hidden_dim, dropout):
        # Initialize TransformerModel instance
        model = TransformerModel(vocab_size, num_TransformerBlocks, embed_dim, num_layers, num_heads, hidden_dim, dropout)
        
        # Compile the model with sparse categorical crossentropy loss and Adam optimizer
        model.compile(
            loss='sparse_categorical_crossentropy',
            optimizer=keras.optimizers.Adam(),
            metrics=['accuracy']
        )
        return model

In [None]:
class TransformerModel():
    def __init__(self, vocab_size, embed_dim=256, num_heads=2, num_blocks=1, ff_dim=256, maxlen=80, rate=0.1):
        #initailize variables
        pass

    def TransformerBlock(self, inputs):
        #create the transformer block as discribed in the writeup, use the Keras functional API (https://keras.io/guides/functional_api/)
        #MultiHeadAttention layer, specifiy 'use_causal_mask=True' (https://keras.io/api/layers/attention_layers/multi_head_attention/)
        #LayerNormalization layer, specifiy 'epsilon=1e-6' (https://keras.io/api/layers/normalization_layers/layer_normalization/)
        #Use the rate variable for the dropout layers and remember to use two dense layers
        #See assignment and its figures for more details.
        pass

    
    def EmbeddingLayer(self, inputs):
        #create the embedding layer
        #create (1) an embedding for the tokens and (2) an embedding for the positions
        #you can use https://keras.io/api/layers/core_layers/embedding/ Embedding class
        #you can use tf.range to enocde positions
        #add (1) and (2) and return the layer
        pass
    
    def create_model(self):
        #combine the EmbeddingLayer and num_blocks TransformerBlocks to create the model, use the Keras functional API (https://keras.io/guides/functional_api/)
        #use the SparseCategoricalCrossentropy loss function (https://keras.io/api/losses/probabilistic_losses/#sparsecategoricalcrossentropy-class)
        pass

## Task 2

In [35]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

class DataSet():
    def __init__(self, filename):
        self.filename = filename
        self.text = open(filename, 'r').read()
        #self.vocabulary = np.unique()
        
    def prep_text(self):
        self.text = self.text.lower()
        self.text = re.sub("'", '', self.text) # getting rid of apostrophes
        
    def tokenize_text(self):
        self.tokenizer = Tokenizer(num_words=self.vocab_size, oov_token='<OOV>')
        self.tokenizer.fit_on_texts([self.text])
        self.vocabulary = list(np.unique(list(self.tokenizer.word_index.keys())))
        
    def create_dataset(self):
        self.prep_text()
        self.tokenize_text()
        sequences = self.tokenizer.texts_to_sequences([self.text])[0]
        x = []
        y = []
        for i in range(0, len(sequences) - self.max_length):
            x_seq = sequences[i:i+self.max_length]
            y_seq = sequences[i+1:i+self.max_length+1]
            x.append(x_seq)
            y.append(y_seq)
        x = np.array(x)
        y = np.array(y)
        return x, y, self.vocabulary


In [36]:
data = DataSet('beatles.txt')

In [None]:
class DataSet():
    def __init__(self, filename, len):
        #load the text from the file
        pass
        

    def prep_text(self):
        #remove all punctuation, set to lowercase, remove duplicate spaces and other whitespace (keep newlines)
        pass
        
        
    def tokenize_text(self):
        #seperate into words, create a vocab and convert the text to a list of numbers using the vocab such that each unique word is represented by its own number number
        pass
        

    def create_dataset(self):
        #split the tokenized data into sequences of length len, return the sequences and vocab
        pass

## Task 3

In [None]:
class GenerateText():
    def __init__(self, model, vocab):
        pass

    
    def generate_text(self, start_string, num_generate=100):
        #generate text using the model and vocab, start with the start_string and generate num_generate words
        pass

## Task 4: Model Traning and Testing

In [None]:
#Train the model while periodically generating text to show progress
def train_model(model, vocab, x, y, epochs=50):
    return model


# Report

## Introduction

## Results

## Conclusion

## How to Run Code

Please include any special libraries and list your tf version here.