In [2]:
!pip install --upgrade bottleneck

Defaulting to user installation because normal site-packages is not writeable
Looking in links: /usr/share/pip-wheels
Collecting bottleneck
  Downloading bottleneck-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Downloading bottleneck-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (361 kB)
Installing collected packages: bottleneck
Successfully installed bottleneck-1.5.0


In [4]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

sentences = [
    "I love machine learning",
    "I love deep learning",
    "Machine learning is fun",
    "Deep learning is powerful"
]

tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)

word_index = tokenizer.word_index
vocab_size = len(word_index) + 1  

print("Word Index:")
print(word_index)

sequences = []

for line in sentences:
    encoded = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(encoded)):
        seq = encoded[:i+1]
        sequences.append(seq)

max_len = max(len(seq) for seq in sequences)
sequences = pad_sequences(sequences, maxlen=max_len, padding='pre')

X = sequences[:, :-1]
y = sequences[:, -1]

y = tf.keras.utils.to_categorical(y, num_classes=vocab_size)

print("\nSample input sequence and label (after padding):")
print(X[0], "->", y[0])

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=10, input_length=max_len - 1),
    LSTM(50),
    Dense(vocab_size, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

model.fit(X, y, epochs=200, verbose=1)

def predict_next_word(seed_text):
    tokenized = tokenizer.texts_to_sequences([seed_text])[0]
    padded = pad_sequences([tokenized], maxlen=max_len-1, padding='pre')
    predicted_index = np.argmax(model.predict(padded), axis=-1)[0]
    for word, index in word_index.items():
        if index == predicted_index:
            return word
    return None

input_text = "I love"
predicted_word = predict_next_word(input_text)
print(f"\nGiven input: '{input_text}' → Predicted next word: '{predicted_word}'")

Word Index:
{'learning': 1, 'i': 2, 'love': 3, 'machine': 4, 'deep': 5, 'is': 6, 'fun': 7, 'powerful': 8}

Sample input sequence and label (after padding):
[0 0 2] -> [0. 0. 0. 1. 0. 0. 0. 0. 0.]


2025-05-22 06:42:59.459099: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Epoch 1/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.0833 - loss: 2.1965
Epoch 2/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.3333 - loss: 2.1937
Epoch 3/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.3333 - loss: 2.1908
Epoch 4/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.3333 - loss: 2.1878
Epoch 5/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.3333 - loss: 2.1848
Epoch 6/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.3333 - loss: 2.1818
Epoch 7/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.3333 - loss: 2.1787
Epoch 8/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.3333 - loss: 2.1755
Epoch 9/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m