In [3]:
data_path='C:\\Users\\a3318\\OneDrive - Axtria\\Documents\\Data\\poems\\poems.txt'

with open(data_path,encoding='utf8') as f:
    lines=f.read()
lines

'text\n"O my Luve\'s like a red, red rose\nThat’s newly sprung in June;\nO my Luve\'s like the melodie\nThat’s sweetly play\'d in tune.\n\nAs fair art thou, my bonnie lass,\nSo deep in luve am I:\nAnd I will luve thee still, my dear,\nTill a’ the seas gang dry:\n\nTill a’ the seas gang dry, my dear,\nAnd the rocks melt wi’ the sun:\nI will luve thee still, my dear,\nWhile the sands o’ life shall run.\n\nAnd fare thee well, my only Luve\nAnd fare thee well, a while!\nAnd I will come again, my Luve,\nTho’ it were ten thousand mile."\n"The rose is red,\nThe violet\'s blue,\nSugar is sweet,\nAnd so are you."\n"How do I love thee? Let me count the ways.\nI love thee to the depth and breadth and height\nMy soul can reach, when feeling out of sight\nFor the ends of being and ideal grace.\nI love thee to the level of every day\'s\nMost quiet need, by sun and candle-light.\nI love thee freely, as men strive for right.\nI love thee purely, as they turn from praise.\nI love thee with the passion 

In [4]:
lines=lines.split('\n\n')

In [5]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"

import keras
from keras import layers
from keras import ops
from keras.layers import TextVectorization
import numpy as np
import os
import string
import random
import tensorflow
import tensorflow.data as tf_data
import tensorflow.strings as tf_strings

In [6]:
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
    """
    Mask the upper half of the dot product matrix in self attention.
    This prevents flow of information from future tokens to current token.
    1's in the lower triangle, counting from the lower right corner.
    """
    i = ops.arange(n_dest)[:, None]
    j = ops.arange(n_src)
    m = i >= j - n_src + n_dest
    mask = ops.cast(m, dtype)
    mask = ops.reshape(mask, [1, n_dest, n_src])
    mult = ops.concatenate(
        [ops.expand_dims(batch_size, -1), ops.convert_to_tensor([1, 1])], 0
    )
    return ops.tile(mask, mult)


class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = layers.MultiHeadAttention(num_heads, embed_dim)
        self.ffn = keras.Sequential(
            [
                layers.Dense(ff_dim, activation="relu"),
                layers.Dense(embed_dim),
            ]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs):
        input_shape = ops.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, "bool")
        attention_output = self.att(inputs, inputs, attention_mask=causal_mask)
        attention_output = self.dropout1(attention_output)
        out1 = self.layernorm1(inputs + attention_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        return self.layernorm2(out1 + ffn_output)

In [7]:
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super().__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = ops.shape(x)[-1]
        positions = ops.arange(0, maxlen, 1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [8]:
vocab_size = 20000  # Only consider the top 20k words
maxlen = 80  # Max sequence size
embed_dim = 256  # Embedding size for each token
num_heads = 2  # Number of attention heads
feed_forward_dim = 256  # Hidden layer size in feed forward network inside transformer


def create_model():
    inputs = layers.Input(shape=(maxlen,), dtype="int32")
    embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
    x = embedding_layer(inputs)
    transformer_block1 = TransformerBlock(embed_dim, num_heads, feed_forward_dim)
    x = transformer_block1(x)
    transformer_block2 = TransformerBlock(embed_dim, num_heads, feed_forward_dim)
    x = transformer_block2(x)
    transformer_block3 = TransformerBlock(embed_dim, num_heads, feed_forward_dim)
    x = transformer_block3(x)
    outputs = layers.Dense(vocab_size)(x)
    model = keras.Model(inputs=inputs, outputs=[outputs, x])
    loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model.compile(
        "adam",
        loss=[loss_fn, None],
    )  # No loss and optimization based on word embeddings from transformer block
    return model

In [9]:
model=create_model()
model.summary()




In [10]:
def custom_standardization(input_string):
    """Remove html line-break tags and handle punctuation"""
    lowercased = tf_strings.lower(input_string)
    stripped_html = tf_strings.regex_replace(lowercased, "<br />", " ")
    return tf_strings.regex_replace(stripped_html, f"([{string.punctuation}])", r" \1")


# Create a vectorization layer and adapt it to the text
vectorize_layer = TextVectorization(
    standardize=custom_standardization,
    max_tokens=vocab_size - 1,
    output_mode="int",
    output_sequence_length=maxlen + 1,
)
vectorize_layer.adapt(lines)
vocab = vectorize_layer.get_vocabulary()  # To get words back from token indices


In [11]:
def prepare_lm_inputs_labels(text):
    """
    Shift word sequences by 1 position so that the target for position (i) is
    word at position (i+1). The model will use all words up till position (i)
    to predict the next word.
    """
    text = tensorflow.expand_dims(text, -1)
    print(text.shape)
    tokenized_sentences = vectorize_layer(text)
    
    x = tokenized_sentences[:, :-1]
    y = tokenized_sentences[:, 1:]
    return x, y

In [12]:
x,y= prepare_lm_inputs_labels(lines)

(435, 1)


In [2]:
y

NameError: name 'y' is not defined

In [13]:
model = create_model()

model.fit(x,y, verbose=2, epochs=5)

Epoch 1/5
14/14 - 23s - 2s/step - loss: 7.4774
Epoch 2/5
14/14 - 16s - 1s/step - loss: 4.2797
Epoch 3/5
14/14 - 16s - 1s/step - loss: 3.4989
Epoch 4/5
14/14 - 16s - 1s/step - loss: 3.3659
Epoch 5/5
14/14 - 16s - 1s/step - loss: 3.1656


<keras.src.callbacks.history.History at 0x1e919016e50>

In [26]:
outs=model(x)

In [36]:
loss=keras.losses.SparseCategoricalCrossentropy()
loss(y,outs[0])

<tf.Tensor: shape=(), dtype=float32, numpy=16.90903091430664>

In [38]:
model.summary()

In [15]:
import tensorflow as tf

In [18]:
initial='I would rather'
sentence=[]
for i in range(0,80):
    vect_in=vectorize_layer(initial)
    vect_in=np.expand_dims(vect_in,0)
    vect_in=vect_in[:,0:80]
    print(vect_in)
    outs=model(vect_in)[0]
    outs=outs/0.5
    softmax_out=tf.nn.softmax(outs)
    out=tf.argmax(softmax_out,axis=2)
    word=vocab[out[0][i].numpy().item()]
    sentence.append(word)
    initial=initial+" " +word

    




[[   6   85 1419    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0]]
[[   6   85 1419   46    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0]]
[[   6   85 1419   46    6    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0  

In [19]:
sentence

[np.str_('am'),
 np.str_('i'),
 np.str_(','),
 np.str_('the'),
 np.str_('am'),
 np.str_('and'),
 np.str_(','),
 np.str_(','),
 np.str_('the'),
 np.str_('and'),
 np.str_('and'),
 np.str_('same'),
 np.str_('the'),
 np.str_('the'),
 np.str_(','),
 np.str_('same'),
 np.str_('same'),
 np.str_('and'),
 np.str_(','),
 np.str_(','),
 np.str_('the'),
 np.str_('and'),
 np.str_('and'),
 np.str_('same'),
 np.str_('the'),
 np.str_('the'),
 np.str_(','),
 np.str_('same'),
 np.str_('same'),
 np.str_('and'),
 np.str_(','),
 np.str_(','),
 np.str_('the'),
 np.str_('and'),
 np.str_('and'),
 np.str_('same'),
 np.str_('the'),
 np.str_('the'),
 np.str_(','),
 np.str_('same'),
 np.str_('same'),
 np.str_('and'),
 np.str_(','),
 np.str_(','),
 np.str_('the'),
 np.str_('and'),
 np.str_('and'),
 np.str_('same'),
 np.str_('the'),
 np.str_('the'),
 np.str_(','),
 np.str_('same'),
 np.str_('same'),
 np.str_('and'),
 np.str_(','),
 np.str_(','),
 np.str_('the'),
 np.str_('and'),
 np.str_('and'),
 np.str_('same'),
 

In [193]:
out=tf.argmax(softmax_out,axis=2)

In [34]:
vocab[120]

np.str_('some')

In [196]:
vocab[69]

'thee'