In [17]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding
import random

# Sample text data (training corpus)
text_data = [
    "i love machine learning",
    "i love deep learning",
    "machine learning is amazing",
    "deep learning is powerful",
    "rnn is great for sequence data",
    "tensorflow helps with deep learning",
    "artificial intelligence is the future",
    "deep learning powers modern AI",
    "neural networks can recognize patterns",
    "data science relies on machine learning",
    "supervised learning needs labeled data",
    "unsupervised learning finds hidden patterns",
    "reinforcement learning improves decision making",
    "python is great for machine learning",
    "tensorflow makes deep learning easier",
    "keras simplifies neural network development",
    "pytorch is popular among researchers",
    "scikit learn is useful for data science",
    "matplotlib helps visualize data",
    "self driving cars use deep learning",
    "natural language processing helps chatbots",
    "speech recognition converts voice to text",
    "computer vision detects objects in images",
    "AI recommends movies based on preferences"
]

# Tokenize words (convert words to numbers)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
total_words = len(tokenizer.word_index) + 1

# Create input sequences
input_sequences = []
for line in text_data:
    tokens = tokenizer.texts_to_sequences([line])[0]  # Convert sentence to numbers
    for i in range(1, len(tokens)):
        input_sequences.append(tokens[:i+1])  # Create sequences of increasing length

# Pad sequences to the same length
max_sequence_length = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')

# Split inputs (X) and outputs (y)
X, y = input_sequences[:, :-1], input_sequences[:, -1]

# Convert labels to categorical (one-hot encoding)
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

# Build the RNN model
model = Sequential([
    Embedding(total_words, 10, input_length=max_sequence_length-1),  # Embedding layer
    SimpleRNN(32, activation='relu'),  # RNN layer
    Dense(total_words, activation='softmax')  # Output layer (predict next word)
])

# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=300, verbose=1)

# Function to predict next word
def predict_next_word(seed_text):
    sequence = tokenizer.texts_to_sequences([seed_text])[0]
    sequence = pad_sequences([sequence], maxlen=max_sequence_length-1, padding='pre')
    predicted_index = np.argmax(model.predict(sequence))
    for word, index in tokenizer.word_index.items():
        if index == predicted_index:
            return word
    return "?"

# Try predicting next words
seed_text = "machine learning"
next_word = predict_next_word(seed_text)
print(f"'{seed_text}' → '{seed_text} {next_word}'")

seed_text = "deep learning"
next_word = predict_next_word(seed_text)
print(f"'{seed_text}' → '{seed_text} {next_word}'")


Epoch 1/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - accuracy: 0.0153 - loss: 4.4036   
Epoch 2/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.1013 - loss: 4.3955
Epoch 3/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.1261 - loss: 4.3866 
Epoch 4/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.1767 - loss: 4.3803
Epoch 5/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.1579 - loss: 4.3734
Epoch 6/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.1643 - loss: 4.3574
Epoch 7/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.1289 - loss: 4.3458 
Epoch 8/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.1539 - loss: 4.3286
Epoch 9/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [15]:
seed_text = "learning improves"
next_word = predict_next_word(seed_text)
print(f"'{seed_text}' → '{seed_text} {next_word}'")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 168ms/step
'learning improves' → 'learning improves with'
