In [1]:
pip install tensorflow numpy

Collecting tensorflow
  Using cached tensorflow-2.19.0-cp310-cp310-win_amd64.whl (375.7 MB)
Collecting numpy
  Downloading numpy-2.2.5-cp310-cp310-win_amd64.whl (12.9 MB)
     -------------------------------------- 12.9/12.9 MB 320.7 kB/s eta 0:00:00
Collecting grpcio<2.0,>=1.24.3
  Using cached grpcio-1.71.0-cp310-cp310-win_amd64.whl (4.3 MB)
Collecting h5py>=3.11.0
  Using cached h5py-3.13.0-cp310-cp310-win_amd64.whl (3.0 MB)
Collecting keras>=3.5.0
  Downloading keras-3.9.2-py3-none-any.whl (1.3 MB)
     ---------------------------------------- 1.3/1.3 MB 481.6 kB/s eta 0:00:00
Collecting termcolor>=1.1.0
  Using cached termcolor-3.1.0-py3-none-any.whl (7.7 kB)
Collecting absl-py>=1.0.0
  Using cached absl_py-2.2.2-py3-none-any.whl (135 kB)
Collecting google-pasta>=0.1.1
  Using cached google_pasta-0.2.0-py3-none-any.whl (57 kB)
Collecting numpy
  Downloading numpy-2.1.3-cp310-cp310-win_amd64.whl (12.9 MB)
     -------------------------------------- 12.9/12.9 MB 906.1 kB/s eta 0:00:

In [2]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
import tensorflow as tf

In [3]:

texts = [
    "Artificial intelligence is revolutionizing industries worldwide. Machine learning algorithms enable computers to analyze complex patterns and make intelligent decisions. These systems continuously improve through experience and data analysis.",
    
    "The future of technology lies in neural networks and deep learning. These sophisticated systems can process vast amounts of information and identify patterns that humans might miss. Advanced AI models are now capable of generating human-like text and creative content.",
    
    "Cloud computing has transformed modern business operations. Companies can now scale their infrastructure instantly and access powerful computing resources on demand. This technology enables remote work and global collaboration while reducing operational costs.",
    
    "Cybersecurity is becoming increasingly critical in our digital world. As systems become more connected, protecting sensitive data from unauthorized access is paramount. Modern security solutions use AI and machine learning to detect and prevent cyber threats.",
    
    "The Internet of Things is creating a more connected world. Smart devices communicate with each other to automate daily tasks and improve efficiency. This network of connected devices generates valuable data that can be analyzed for better decision-making."
]


In [4]:

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
total_words = len(tokenizer.word_index) + 1

In [5]:
# Create sequences
sequences = []
for line in texts:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        sequences.append(n_gram_sequence)

In [6]:

# Pad sequences
max_sequence_len = max([len(x) for x in sequences])
sequences = np.array(pad_sequences(sequences, maxlen=max_sequence_len, padding='pre'))

In [7]:
# Create training data
X = sequences[:, :-1]
y = sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

# Enhanced model with better architecture
model = Sequential([
    Embedding(total_words, 128, input_length=max_sequence_len-1),
    LSTM(256, return_sequences=True),
    LSTM(128),
    Dense(128, activation='relu'),
    Dense(total_words, activation='softmax')
])




In [8]:
# Adjusted compilation parameters
model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.005),
    metrics=['accuracy']
)

# More training epochs
model.fit(X, y, epochs=150, batch_size=32, verbose=1)

Epoch 1/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 243ms/step - accuracy: 0.0218 - loss: 4.9030
Epoch 2/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 236ms/step - accuracy: 0.0653 - loss: 4.8603
Epoch 3/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 242ms/step - accuracy: 0.0805 - loss: 4.6596
Epoch 4/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 233ms/step - accuracy: 0.0581 - loss: 4.5822
Epoch 5/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 225ms/step - accuracy: 0.0570 - loss: 4.2975
Epoch 6/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 236ms/step - accuracy: 0.0571 - loss: 4.1663
Epoch 7/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 234ms/step - accuracy: 0.1042 - loss: 3.7894
Epoch 8/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 227ms/step - accuracy: 0.0992 - loss: 3.5734
Epoch 9/150
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1d5ded9d690>

In [9]:
# Improved text generation function with repetition prevention
def generate_text(seed_text, next_words):
    generated_words = seed_text.split()
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        
        # Get top 5 predictions
        top_predictions = np.argsort(predicted[0])[-5:]
        
        # Try each prediction until finding a non-repeating word
        output_word = ""
        for pred_idx in reversed(top_predictions):
            for word, index in tokenizer.word_index.items():
                if index == pred_idx:
                    # Check if word is not recently used (in last 3 words)
                    if word not in generated_words[-3:]:
                        output_word = word
                        break
            if output_word:
                break
                
        # If all top predictions were repeating, take the best non-repeating word
        if not output_word:
            for word, index in tokenizer.word_index.items():
                if word not in generated_words[-3:]:
                    output_word = word
                    break
        
        generated_words.append(output_word)
        seed_text = " ".join(generated_words)
    
    return seed_text


In [10]:
# Test the model with user input
while True:
    seed = input("\nEnter a seed text (or 'quit' to exit): ")
    if seed.lower() == 'quit':
        break
    num_words = int(input("How many words to generate? "))
    print("\nGenerated text:")
    print(generate_text(seed, num_words))


Enter a seed text (or 'quit' to exit):  artificial intelligence
How many words to generate?  30



Generated text:
artificial intelligence is revolutionizing industries worldwide machine learning algorithms enable computers to analyze complex patterns and make intelligent decisions these systems continuously improve through experience and data analysis humans miss advanced ai



Enter a seed text (or 'quit' to exit):  quit
