# Project 4
## Students:
 > Austin Houston,
 > Alexander Krneta
 
 

In [9]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical

print(tf.__version__)# you may want to upgrade to 2.10.0 

2.9.0


## Task 1

In [7]:
class TransformerModel(keras.Model):
    def __init__(self, vocab_size, num_TransformerBlocks, embed_dim=256, num_heads=2, num_blocks=1, ff_dim=256, maxlen=80, dropout_rate=0.1):
        super().__init__()
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.max_length = maxlen
        self.dropout_rate = dropout_rate
        
        # Initialize input layer
        self.inputs = keras.Input(shape=(None, self.embed_dim))

        # Initialize embedding layer
        self.embedding_layer = self.EmbeddingLayer(vocab_size, embed_dim, input_length=maxlen)
        
        # Initialize transformer blocks
        self.transformer_blocks = [self.TransformerBlock(embed_dim, num_heads, ff_dim, dropout_rate) for _ in range(num_TransformerBlocks)]
        
        # Initialize output layer
        self.output_layer = layers.Dense(vocab_size, activation='softmax')


    def TransformerBlock(self):
        # Multi-Head Attention layer 
        # Sums the input to the block and the output from the first dropout
        attention = layers.MultiHeadAttention(num_heads=self.num_heads, key_dim=self.embed_dim)(self.inputs, self.inputs)
        attention = layers.Dropout(rate=self.dropout_rate)(attention)
        attention = layers.LayerNormalization(epsilon=1e-6)(layers.Add()([self.inputs, attention]))
        
        # Feed-Forward Dense layer
        # Sums the output of the first LayerNormalization and second dropout
        dense = layers.Dense(units=self.ff_dim, activation='relu')(attention)
        dense = layers.Dropout(rate=self.dropout_rate)(dense)
        dense = layers.Dense(units=self.embed_dim)(dense)
        dense = layers.Dropout(rate=self.dropout_rate)(dense)
        dense = layers.LayerNormalization(epsilon=1e-6)(layers.Add()([attention, dense]))
        
        # Build the transformer block
        transformer_block = keras.Model(inputs=self.inputs, outputs=dense, name='transformer_block')
        return transformer_block
    

    def EmbeddingLayer(self):
        self.token_embedding = layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim, input_length=self.max_length)
        self.positional_embedding = layers.Embedding(input_dim=self.max_length, output_dim=self.embed_dim, input_length=self.max_length, embeddings_initializer=keras.initializers.RandomUniform())
        self.dropout = layers.Dropout(self.dropout_rate)

        position_ids = tf.range(start=0, limit=tf.shape(self.inputs)[-1], delta=1, dtype=tf.int32)
        position_embedding = self.positional_embedding(position_ids)
        token_embedding = self.token_embedding(self.inputs)
        embeddings = token_embedding + position_embedding
        embeddings = self.dropout(embeddings)        


    def create_model(self,vocab_size, num_TransformerBlocks, embed_dim, num_layers, num_heads, hidden_dim, dropout):
        # Initialize TransformerModel instance
        model = TransformerModel(vocab_size, num_TransformerBlocks, embed_dim, num_layers, num_heads, hidden_dim, dropout)
        
        # Compile the model with sparse categorical crossentropy loss and Adam optimizer
        model.compile(
            loss='sparse_categorical_crossentropy',
            optimizer=keras.optimizers.Adam(),
            metrics=['accuracy']
        )
        return model

## Task 2

In [1]:
# needs work with special characters

In [2]:
class Dataset:
    def __init__(self, filepath):
        with open(filepath, 'r') as f:
            self.text = f.read()
    
    def prep_text(self):
        self.text = self.text.lower()
        self.text = ''.join([c for c in self.text if c.isalnum() or c.isspace()])
    
    def tokenize_text(self):
        words = np.unique(self.text.split())
        self.vocab = {w: i+1 for i, w in enumerate(words)}
        self.reverse_vocab = {i+1: w for i, w in enumerate(words)}
        self.text = [self.vocab[w] for w in self.text.split()]
    
    def create_dataset(self):
        self.prep_text()
        self.tokenize_text()
        x = np.array(self.text[:-1])
        y = np.array(self.text[1:])
        x = to_categorical(x, num_classes=len(self.vocab)+1)
        return x, y, self.vocab


In [5]:
data = Dataset('beatles.txt')
x, y, vocab = data.create_dataset()

## Task 3

In [None]:
class GenerateText():
    def __init__(self, model, vocab):
        pass

    
    def generate_text(self, start_string, num_generate=100):
        #generate text using the model and vocab, start with the start_string and generate num_generate words
        pass

## Task 4: Model Traning and Testing

In [None]:
#Train the model while periodically generating text to show progress
def train_model(model, vocab, x, y, epochs=50):
    return model


# Report

## Introduction

## Results

## Conclusion

## How to Run Code

Please include any special libraries and list your tf version here.