In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [2]:
text = """
artificial intelligence is transforming modern society.
it is used in healthcare finance education and transportation.
machine learning allows systems to improve automatically with experience.
data plays a critical role in training intelligent systems.
large datasets help models learn complex patterns.
deep learning uses multi layer neural networks.
neural networks are inspired by biological neurons.
each neuron processes input and produces an output.
training a neural network requires optimization techniques.
gradient descent minimizes the loss function.
"""


In [3]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])

total_words = len(tokenizer.word_index) + 1


In [4]:
input_sequences = []

for line in text.split("\n"):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        input_sequences.append(token_list[:i+1])

max_len = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_len, padding='pre')

X = input_sequences[:, :-1]
y = input_sequences[:, -1]


In [5]:
model = Sequential([
    Embedding(total_words, 50, input_length=max_len-1),
    LSTM(100),
    Dense(total_words, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()




In [6]:
model.fit(X, y, epochs=200, verbose=1)


Epoch 1/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - accuracy: 0.0000e+00 - loss: 4.1917
Epoch 2/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.0736 - loss: 4.1845
Epoch 3/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.0620 - loss: 4.1800
Epoch 4/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.0310 - loss: 4.1756
Epoch 5/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.0776 - loss: 4.1704
Epoch 6/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.0736 - loss: 4.1664
Epoch 7/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.0814 - loss: 4.1605
Epoch 8/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.0814 - loss: 4.1559
Epoch 9/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0

<keras.src.callbacks.history.History at 0x7a1ed614a240>

In [7]:
def generate_text(seed_text, next_words):
    for _ in range(next_words):
        seq = tokenizer.texts_to_sequences([seed_text])[0]
        seq = pad_sequences([seq], maxlen=max_len-1, padding='pre')
        predicted = np.argmax(model.predict(seq, verbose=0))

        for word, index in tokenizer.word_index.items():
            if index == predicted:
                seed_text += " " + word
                break
    return seed_text

print(generate_text("artificial intelligence", 10))


artificial intelligence is transforming modern society society society neurons experience experience experience


In [8]:
from tensorflow.keras.layers import Input, Dense, LayerNormalization, MultiHeadAttention
from tensorflow.keras.models import Model


In [9]:
X = input_sequences[:, :-1]
y = input_sequences[:, -1]

X = X.astype(np.int32)
y = y.astype(np.int32)


In [10]:
def positional_encoding(seq_len, d_model):
    pos = np.arange(seq_len)[:, np.newaxis]
    i = np.arange(d_model)[np.newaxis, :]
    angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
    angle_rads = pos * angle_rates
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    return tf.cast(angle_rads, dtype=tf.float32)


In [11]:
def transformer_block(x, head_size, num_heads, ff_dim):
    attn = MultiHeadAttention(num_heads=num_heads, key_dim=head_size)(x, x)
    x = LayerNormalization()(x + attn)
    ff = Dense(ff_dim, activation='relu')(x)
    ff = Dense(head_size)(ff)
    return LayerNormalization()(x + ff)


In [12]:
inputs = Input(shape=(max_len-1,))
embedding = Embedding(total_words, 64)(inputs)
pos_embed = positional_encoding(max_len-1, 64)
x = embedding + pos_embed

x = transformer_block(x, 64, 2, 128)
x = Dense(64, activation='relu')(x)
outputs = Dense(total_words, activation='softmax')(x[:, -1])

transformer_model = Model(inputs, outputs)

transformer_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

transformer_model.summary()


In [13]:
transformer_model.fit(X, y, epochs=100, verbose=1)


Epoch 1/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 32ms/step - accuracy: 0.0194 - loss: 4.2378
Epoch 2/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.0116 - loss: 4.1737    
Epoch 3/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.0310 - loss: 4.1719
Epoch 4/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.0388 - loss: 4.1853
Epoch 5/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.0232 - loss: 4.1795    
Epoch 6/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.0232 - loss: 4.1868    
Epoch 7/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.0194 - loss: 4.1704
Epoch 8/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.0116 - loss: 4.1618    
Epoch 9/100
[1m3/3[0m [32m━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7a1ecef84950>

In [14]:
def generate_transformer_text(seed_text, next_words):
    for _ in range(next_words):
        seq = tokenizer.texts_to_sequences([seed_text])[0]
        seq = pad_sequences([seq], maxlen=max_len-1, padding='pre')
        pred = np.argmax(transformer_model.predict(seq, verbose=0))

        for word, index in tokenizer.word_index.items():
            if index == pred:
                seed_text += " " + word
                break
    return seed_text

print(generate_transformer_text("machine learning", 10))


machine learning allows systems to improve automatically with experience experience experience experience
