<a href="https://colab.research.google.com/github/VD0627/CP/blob/main/TransformerArchitecture.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.layers import Layer, Embedding, Dense, LayerNormalization, Dropout
import numpy as np

In [3]:
def load_data(file_path):
  with open(file_path, 'r', encoding='utf-8') as f:
        text = f.read()
  return text

In [15]:
import zipfile

def load_and_save_data(zip_file_path, output_file_name="hp.txt"):
    """
    Loads data from a zip file and saves it to a text file.

    Args:
        zip_file_path: Path to the zip file.
        output_file_name: Name of the output text file.
    """
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        # Get the first file in the zip archive (assuming there's only one)
        file_name = zip_ref.namelist()[0]
        # Extract the content of the file
        with zip_ref.open(file_name) as f:
            text = f.read().decode('utf-8')

    # Save the extracted text to a new file
    with open(output_file_name, 'w', encoding='utf-8') as outfile:
        outfile.write(text)

    return text

# Replace 'hp.zip' with the actual name of your zip file
zip_file_path = "hp.zip"
text = load_and_save_data(zip_file_path)



In [22]:
zip_file_path = "hp.zip"
text = load_and_save_data(zip_file_path)

file_path = "hp.txt"

text = load_data(file_path).lower()

In [23]:
tokenizer = Tokenizer(oov_token='<OOV>')
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1

In [24]:
input_sequences = []
tokens = tokenizer.texts_to_sequences([text])[0]
seq_length = 50

In [25]:
for i in range(seq_length, len(tokens)):
    input_sequences.append(tokens[i - seq_length:i + 1])

In [26]:
input_sequences = np.array(pad_sequences(input_sequences, maxlen=seq_length + 1, padding='pre'))
X, y = input_sequences[:, :-1], input_sequences[:, -1]

In [27]:
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

CORE

In [62]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, Embedding, Dense, LayerNormalization, Dropout

In [63]:
class MultiHeadAttention(Layer):

    def __init__(self, embed_dim, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads # example - 8

        self.embed_dim = embed_dim

        self.projection_dim = embed_dim // num_heads
        self.query_dense = Dense(embed_dim)
        self.key_dense = Dense(embed_dim)
        self.value_dense = Dense(embed_dim)
        self.combine_heads = Dense(embed_dim)

In [47]:
def attention(self, query, key, value):
        scores = tf.matmul(query, key, transpose_b=True)
        scores /= tf.math.sqrt(tf.cast(self.projection_dim, tf.float32))
        attention_probs = tf.nn.softmax(scores, axis=-1)
        return tf.matmul(attention_probs, value), attention_probs


In [48]:
def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

In [50]:
def call(self, inputs):
        query, key, value = inputs
        batch_size = tf.shape(query)[0] # (batch_size, seq_len, embed_dim)

        query = self.split_heads(self.query_dense(query), batch_size)
        key = self.split_heads(self.key_dense(key), batch_size)
        value = self.split_heads(self.value_dense(value), batch_size)

        attention, _ = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))
        return self.combine_heads(concat_attention)

In [53]:
class TransformerBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(embed_dim, num_heads)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

In [55]:
def call(self, inputs, training):
        attn_output = self.att([inputs, inputs, inputs])
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [56]:
class TokenAndPositionEmbedding(Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = Embedding(input_dim=maxlen, output_dim=embed_dim)

In [58]:
def call(self, x):
    maxlen = tf.shape(x)[-1]
    positions = tf.range(start=0, limit=maxlen, delta=1)
    positions = self.pos_emb(positions)
    x = self.token_emb(x)
    return x + positions

MODEL, COMPILE AND RUN.

In [72]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, Embedding, Dense, LayerNormalization, Dropout

class MultiHeadAttention(Layer):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.embed_dim = embed_dim
        self.projection_dim = embed_dim // num_heads
        self.query_dense = Dense(embed_dim)
        self.key_dense = Dense(embed_dim)
        self.value_dense = Dense(embed_dim)
        self.combine_heads = Dense(embed_dim)

    def attention(self, query, key, value):
        scores = tf.matmul(query, key, transpose_b=True)
        scores /= tf.math.sqrt(tf.cast(self.projection_dim, tf.float32))
        attention_probs = tf.nn.softmax(scores, axis=-1)
        return tf.matmul(attention_probs, value), attention_probs

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        query, key, value = inputs
        batch_size = tf.shape(query)[0]

        query = self.split_heads(self.query_dense(query), batch_size)
        key = self.split_heads(self.key_dense(key), batch_size)
        value = self.split_heads(self.value_dense(value), batch_size)

        attention, _ = self.attention(query, key, value)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))
        return self.combine_heads(concat_attention)


class TransformerBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(embed_dim, num_heads) # Use the defined MultiHeadAttention
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att([inputs, inputs, inputs])
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


embed_dim = 128
num_heads = 4
ff_dim = 51
maxlen = seq_length
inputs = tf.keras.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, total_words, embed_dim)
x = embedding_layer(inputs)
print(x.shape)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x, training=True)
print(x.shape)
x = x[:, -1, :]
print(x.shape)
x = Dense(total_words, activation="softmax")(x)
print(x.shape)
model = tf.keras.Model(inputs=inputs, outputs=x)


model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

model.summary()

(None, 50, 128)
(None, 50, 128)
(None, 128)
(None, 6663)


In [None]:
history = model.fit(X, y, batch_size=32, epochs=10)

Epoch 1/10
[1m2531/2531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 80ms/step - accuracy: 0.0812 - loss: 6.5369
Epoch 2/10
[1m2531/2531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 80ms/step - accuracy: 0.1573 - loss: 5.0757
Epoch 3/10
[1m2531/2531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 79ms/step - accuracy: 0.2113 - loss: 4.2532
Epoch 4/10
[1m2531/2531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 77ms/step - accuracy: 0.2557 - loss: 3.6398
Epoch 5/10
[1m2531/2531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 84ms/step - accuracy: 0.3181 - loss: 3.0897
Epoch 6/10
[1m2531/2531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 82ms/step - accuracy: 0.3996 - loss: 2.5876
Epoch 7/10
[1m2531/2531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 80ms/step - accuracy: 0.4802 - loss: 2.1647
Epoch 8/10
[1m2531/2531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 79ms/step - accuracy: 0.5508 - loss: 1.8140


In [74]:
def generate_text(seed_text, next_words, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        predicted_word = tokenizer.index_word[np.argmax(predicted)]
        seed_text += " " + predicted_word
    return seed_text

seed_text = "harry looked at"
generated_text = generate_text(seed_text, next_words=50, max_sequence_len=seq_length + 1)
print(len(generated_text))

269


In [75]:
print(generated_text)

harry looked at the great hall for the start training for christmas ” “but in the house when i had been caught years there seemed to be in the house when i should be in the house smelled of cabbage and mrs dursley pretended she didn’t have a sister lately “i swore when
