# Project 4
## Students:
 > [Eli Carter]
 > [Gabriel Stowe]
 
 

In [44]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import re

In [45]:
print(tf.__version__)# you may want to upgrade to 2.10.0 

2.12.0


### Please Use Markdown
> for markdown, see here: https://www.ibm.com/docs/en/watson-studio-local/1.2.3?topic=notebooks-markdown-jupyter-cheatsheet

## Task 1

In [46]:
class TransformerModel():
    def __init__(self, vocab_size, embed_dim=256, num_heads=2, num_blocks=1, ff_dim=256, maxlen=100, rate=0.1):
        #initailize variables
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.num_blocks = num_blocks
        self.ff_dim = ff_dim
        self.maxlen = maxlen
        self.rate = rate

    def TransformerBlock(self, inputs):
        #create the transformer block as discribed in the writeup, use the Keras functional API (https://keras.io/guides/functional_api/)
        #MultiHeadAttention layer, specifiy 'use_causal_mask=True' (https://keras.io/api/layers/attention_layers/multi_head_attention/)
        #LayerNormalization layer, specifiy 'epsilon=1e-6' (https://keras.io/api/layers/normalization_layers/layer_normalization/)
        #Use the rate variable for the dropout layers
        mha = tf.keras.layers.MultiHeadAttention(num_heads=self.num_heads, key_dim=self.embed_dim)(inputs, inputs, use_causal_mask=True)
        d1 = layers.Dropout(self.rate)(mha)
        n1 = layers.LayerNormalization(epsilon=1e-6)(d1 + inputs)
        fc1 = layers.Dense(self.ff_dim, activation='relu')(n1)
        fc2 = layers.Dense(self.ff_dim, activation='relu')(fc1)
        d2 = layers.Dropout(self.rate)(fc2)
        n2 = layers.LayerNormalization(epsilon=1e-6)(d2 + n1)
        return n2


    
    def EmbeddingLayer(self, inputs):
        #create the embedding layer
        #create (1) an embedding for the tokens and (2) an embedding for the positions
        #you can use https://keras.io/api/layers/core_layers/embedding/ Embedding class
        #you can use tf.range to enocde positions
        #add (1) and (2) and return the layer
        toke_embedding = layers.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim)(inputs)
        pos_embedding = layers.Embedding(input_dim=self.maxlen, output_dim=self.embed_dim)(tf.range(self.maxlen))
        return toke_embedding + pos_embedding
        #return layers.TokenAndPositionEmbedding(self.maxlen, self.vocab_size, self.embed_dim)(inputs)
    
    def create_model(self):
        #combine the EmbeddingLayer and num_blocks TransformerBlocks to create the model, use the Keras functional API (https://keras.io/guides/functional_api/)
        #use the SparseCategoricalCrossentropy loss function (https://keras.io/api/losses/probabilistic_losses/#sparsecategoricalcrossentropy-class)

        inputs = layers.Input(shape=(self.maxlen,))
        embedding = self.EmbeddingLayer(inputs)
        tmp = embedding
        for i in range(self.num_blocks):
            tmp = self.TransformerBlock(tmp)

        outputs = layers.Dense(self.vocab_size, activation='softmax')(tmp)
        model = keras.Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        return model


## Task 2

In [47]:
class DataSet():
    def __init__(self, filename, len):
        #load the text from the file
        self.text = open(filename, 'r').read()
        self.len = len
        

    def prep_text(self):
        #remove all punctuation
        self.text = re.sub(r'[^\w\s]', '', self.text)
        #remove all special characters
        self.text = re.sub(r'[^a-zA-Z0-9\s]', '', self.text)
        #replace all whitespaces except for \n with a space
        self.text = re.sub(r'[^\S\n]+', ' ', self.text)
        #replace all \n with a space, newline, then space
        self.text = re.sub(r'\n', ' \n ', self.text)
        
    def tokenize_text(self):
        #seperate into words, create a vocab and convert the text to a list of numbers using the vocab such that each unique word is represented by its own number
        self.text = self.text.split(' ')
        #remove all the empty strings ??????
        c = self.text.count('')
        for i in range(c):
            self.text.remove('')
        self.vocab = np.unique(self.text)
        self.text = np.array([np.where(self.vocab == word)[0][0] for word in self.text])

    def create_dataset(self):
        #split the tokenized data into sequences of length len, return the sequences and vocab
        self.prep_text()
        self.tokenize_text()
        x = []
        y = []
        for i in range(len(self.text) - self.len - 1):
            x.append(self.text[i:i+self.len])
            y.append(self.text[i+1:i+self.len+1])
        return np.array(x), np.array(y), self.vocab

In [48]:
test = DataSet('beatles.txt', 64)
x, y, vocab = test.create_dataset()
print(x[-1])
print(y[-1])
print(vocab)
print(type(vocab))
print(len(vocab))


[2585 1444 1957 1173   15    0 1986 1095   34    0  481    0 2252 1932
 2435  251   13 1271 1271 2276   37    0 2585 1444 1957 1173 2585 1444
 1957    0 2585 1444 1957 1173 2565    0 2585 1444 1957 1173 2585 1444
 1957    0 2585 1444 1957 1173 2565    0 2585 1444 1957 1173 2585 1444
 1957    0 2585 1444 1957 1173 2565    0]
[1444 1957 1173   15    0 1986 1095   34    0  481    0 2252 1932 2435
  251   13 1271 1271 2276   37    0 2585 1444 1957 1173 2585 1444 1957
    0 2585 1444 1957 1173 2565    0 2585 1444 1957 1173 2585 1444 1957
    0 2585 1444 1957 1173 2565    0 2585 1444 1957 1173 2585 1444 1957
    0 2585 1444 1957 1173 2565    0  133]
['\n' '0' '1' ... 'zapped' 'zoo' 'zu']
<class 'numpy.ndarray'>
2595


## Task 3

In [86]:
class GenerateText():
    def __init__(self, model, vocab, maxlen):
        self.vocab = vocab
        self.model = model
        self.maxlen = maxlen

    
    def generate_text(self, start_string, num_generate=150):
        #generate text using the model and vocab, start with the start_string and generate num_generate words
        #use the model to predict the next word, then add it to the input and predict the next word, repeat until num_generate words have been generated

        #convert the start_string to a numpy list of numbers using the vocab 
        start_tokens = [np.where(self.vocab == word)[0][0] for word in start_string.split(' ')]
        
        for i in range(num_generate):
            pad_len = self.maxlen - len(start_tokens)
            if pad_len < 0:
                x = start_tokens[-self.maxlen:]
            elif pad_len > 0:
                x = start_tokens + [0] * pad_len
            else:
                x = start_tokens
            x = np.array([x])

            #use the model to predict the next word
            prediction = self.model.predict(x,verbose=0)[0][min([i,self.maxlen-1])]
            #add the predicted word to the input
            start_tokens.append(np.argmax(prediction))

        #convert the list of numbers back to a string using the vocab
        return ' '.join([self.vocab[i] for i in start_tokens])
    
    def generate_random_text(self, num_generate=150):
        #generate text using the model and vocab, start with a random word and generate num_generate words

        #choose a random word from the vocab as the start_string
        start_string = np.random.choice(self.vocab)
        return self.generate_text(start_string, num_generate)


## Task 4: Model Traning and Testing

In [92]:
import time
#Do not use fit verbose. This will tell what Epoch currently on. 
def train_model(model, vocab, x, y, epochs=50, verbose=0, maxlen=50, heads=0):
    start_time = time.time()
    for i in range(epochs):
        #train the model
        print("Epoch:",i+1)
        history = model.fit(x, y,verbose=verbose ,use_multiprocessing=True)
        
        if epochs == 1 or ((i+1) % 10 == 0):
            with open(f'{heads}_{epochs}.txt', 'a') as file:
                #generate text using the model
                gen_text = GenerateText(model, vocab, maxlen)
                file.write(f'Epoch {i+1}\n')
                file.write("Generated Text:\n")
                file.write(gen_text.generate_random_text())
                file.write("\n")
                file.write(f"Accuracy: {history.history['accuracy'][-1]} Loss: {history.history['loss'][-1]} Total Time: {time.time() - start_time} seconds\n")
                
                print(f"Accuracy: {history.history['accuracy'][-1]} Loss: {history.history['loss'][-1]} Total Time: {time.time() - start_time} seconds\n")
                
                file.write('\n\n')
    return model

## Running the models

In [93]:
# Create the dataset
dataset = DataSet('beatles.txt', 50)
x, y, vocab = dataset.create_dataset()
# Create the model
# Train the model
for epoch in [1,50,100]:
    for head in [2,4]:
        print(f"Working: {epoch} epochs {head} heads")
        my_model = TransformerModel(len(vocab), num_heads=head, maxlen=50)
        compiled_model = my_model.create_model()

        trained_model = train_model(compiled_model, vocab, x, y, epoch, heads=head)
        # trained_model.save('saved_model/64_50_model')


Working: 1 epochs 2 heads
Epoch: 1
Accuracy: 0.5701628923416138 Loss: 1.9290947914123535 Total Time: 126.69433236122131 seconds

Working: 1 epochs 4 heads
Epoch: 1
Accuracy: 0.5826221704483032 Loss: 1.8848795890808105 Total Time: 176.5903513431549 seconds

Working: 50 epochs 2 heads
Epoch: 1
Epoch: 2
Epoch: 3
Epoch: 4
Epoch: 5
Epoch: 6
Epoch: 7
Epoch: 8
Epoch: 9
Epoch: 10
Accuracy: 0.9351632595062256 Loss: 0.23857295513153076 Total Time: 1236.5649600028992 seconds

Epoch: 11
Epoch: 12
Epoch: 13
Epoch: 14
Epoch: 15
Epoch: 16
Epoch: 17
Epoch: 18
Epoch: 19
Epoch: 20
Accuracy: 0.9435104727745056 Loss: 0.2053060084581375 Total Time: 2454.8429794311523 seconds

Epoch: 21
Epoch: 22
Epoch: 23
Epoch: 24
Epoch: 25
Epoch: 26
Epoch: 27
Epoch: 28
Epoch: 29
Epoch: 30
Accuracy: 0.9469761252403259 Loss: 0.1913803666830063 Total Time: 3666.451151371002 seconds

Epoch: 31
Epoch: 32
Epoch: 33
Epoch: 34
Epoch: 35
Epoch: 36
Epoch: 37
Epoch: 38
Epoch: 39
Epoch: 40
Accuracy: 0.9492838382720947 Loss: 0.181935


# Report

## Introduction

In this project, we will be building a Transformer-based neural network to write Beatles songs. The goal is to frame the problem as a many-to-many task, where we are trying to predict a series of words. We will be using a text file that includes lyrics from 246 Beatles songs, which are concatenated with each other and treated as one long sequence. The dataset is taken from the following website: http://beatlesnumber9.com/lyrics.html.

Our network is built on a Transformer architecture that was laid out in the project write up. This Transformer architecture consisits of a TransformerModel class, which will include an init method, a TransformerBlock method, an EmbeddingLayer method, and a create_model method. In addition to, we created a DataSet class, which will be responsible for loading the text and generating sequences for training. Finally, we created a GenerateText class, which will be responsible for using the model to generate text.

## Results

For this particular project, the training data set is considered pretty small. While the the model is able to predict and form "beatle songs", the model is far from pefect. Down below is the result from running the model with epochs 1, 50 and 100 with attention heads of 2 and 4 for each. 




### 1 Epcoh With 2 Attention Heads:
```
Accuracy: 0.57
Loss: 1.92
Total Time To Complete: 126.69 Seconds

Generated Text:
lagoon 
 didnt anybody tell her 
 sundays on the phone to monday 
 tuesdays on the phone to me 
 she said shed always been a dancer 
 she worked at 15 clubs a day 
 and though she thought i knew the answer 
 and though she knew the answer 
 well i could not have a day 
 well i knew the answer 
 well i could not have a fool on the hill 
 well i never seems to the hill 
 man i tell you man with the foolish grin is keeping perfectly still 
 and never know him 
 and they can see 
 they can see 
 they can see 
 that hes just a fool that hes just a fool 
 and he never gives an answer 
 and the fool on the hill 
 sees the sun going down 
 and the
```

### 50 Epcohs With 2 Attention Heads:
```
Accuracy: 0.95
Loss: 0.18
Total Time To Complete: 6182.82 Seconds

Generated Text:
pornographic priestess 
 boy you been a naughty girl you let your knickers down 
 i am the eggman they are the eggmen 
 i am the walrus goo goo gjoob 
 sitting in an english garden waiting for the sun 
 if the sun dont come you get a tan 
 from standing in the english rain 
 i am the eggman they are the eggmen 
 i am the walrus goo goo gjoob ggoo goo gjoob 
 expert textpert choking smokers 
 dont you think the joker laughs at you 
 see how they smile like pigs in a sty 
 see how they snide 
 im crying 
 semolina pilchard climbing up the eiffel tower 
 elementary penguin singing hari krishna 
 man you should have seen them kicking edgar allan poe 
 i am the eggman they are the eggmen 
 i am the walrus goo goo
```

### 100 Epcohs With 2 Attention Heads:
```
Accuracy: 0.95
Loss: 0.16
Total Time To Complete: 13323.36 Seconds

Generated Text:
ooh 
 i dug a pony 
 well you can syndicate any boat you row 
 yes you can syndicate any boat you row 
 i told you all i want is you 
 evrything has got to bejust like you want it to 
 because 
 dig it 
 like a rolling stone 
 a like a rolling stone 
 like the fbi and the cia 
 and the bbcbb king 
 and doris day 
 matt busby 
 dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig it dig
```

### 1 Epcoh With 4 Attention Heads:
```
Accuracy: 0.58
Loss: 1.88
Total Time To Complete: 176.59 Seconds

Generated Text:
hates to see me say 
 i see me cry 
 i see you say 
 i see you say 
 i will hear you say that i will hear you say 
 youll be happy cause you say 
 youll be happy 
 i will never leave me say 
 if you and if you know that i will love me 
 if you dont want to last 
 i will remember you will love you by 
 i will love her 
 i think of things she said 
 when i get her 
 for her 
 shes mine 
 and i think of things she said her 
 she said her things she said her 
 she said her 
 only to be dead 
 and i know that living with me is to be free 
 is out tonight 
 shes thinking of her 
 shes got
```

### 50 Epcohs With 4 Attention Heads:
```
Accuracy: 0.95
Loss: 0.16
Total Time To Complete: 8680.10 Seconds

Generated Text:
institution 
 well you know 
 you better free your mind instead 
 but if you go carrying pictures of chairman mao 
 you aint going to make it with anyone anyhow 
 dont you know know its gonna be alright 
 alright 
 alright 
 alright 
 repeat till fade 
 rock and roll music 
 just let me hear some of that rock and roll music 
 any old way you choose it 
 its got a back beat you cant lose it 
 any old time you use it 
 its gotta be rock roll music 
 if you wanna dance with me 
 if you wanna dance with me 
 way down south they gave a jubilee 
 the jokey folks they had a jamboree 
 theyre drinkin home brew from a water cup 
 the folks dancin got all shook up 
 and started playin that
```

### 100 Epcohs With 4 Attention Heads:
```
Accuracy: 0.95
Loss: 0.15
Total Time To Complete: 21028.84 Seconds

Generated Text:
henry the horse dances the waltz 
 the band begins at ten to six 
 when mr k performs his tricks without a sound 
 and mr h will demonstrate 
 ten summersets hell undertake on solid ground 
 having been some days in preparation 
 a splendid time is guaranteed for all 
 and tonight mr kite is topping the bill 
 birthday 
 you say its your birthday 
 its my birthday tooyeah 
 they say its your birthday 
 were gonna have a good time 
 im glad its your birthday 
 happy birthday to you 
 yes were going to a party party 
 yes were going to a party party 
 yes were going to a party party 
 i would like you to dancebirthday 
 think its your birthday 
 happy birthday to you 
 blackbird 
 blackbird singing in the dead of night 
```

Based on the results, it is evident that the models have some ability to produce coherent and meaningful content. However, there are some differences in the quality of the generated text across different training epochs and attention heads. 

In general, the models with more epochs tend to generate more coherent and accurate lyrics. For example, the 50 and 100 epoch models with 2 and 4 attention heads produced more coherent lyrics with a higher acuracy compared to the 1 epoch models.

Moreover, the 4 attention head models generally produced more coherent lyrics compared to 2 attention head models. This is most likely do to the increased model complexity.

Overall, there is still issues with the generated content. In some cases, the lyrics that the model produced lacks coherence and does not make much sense. In addition, the models will repeat phrases and words multiple time, which can be repetitive and uninteresting. 


## Conclusion

In conclusion, these models are far from being good at producing Beatle songs. This could be do to the small set of data trained on or how the training data was represented or ever the combination of both. That being the case, the models were able to produce lyrics that are somewhat coherent.

## How to Run Code
For this project the following python version and libraries used are as follows:
```
Python=3.11.3
Tensorflow=2.12
re=2.21
time=built-in
```

Once these are installed, simply just run the cells from top to bottom.