In [1]:
import re
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [2]:
texts = [
  'How to be a good machine learning engineer',
  'Top 10 skills for a successful machine learning career',
  'The importance of continuous learning in machine learning',
  'How to improve your machine learning model performance',
  'The role of data visualization in machine learning',
  'How to choose the right machine learning algorithm for your problem',
  'The basics of deep learning and neural networks',
  'How to prepare your data for machine learning',
  'The importance of feature engineering in machine learning',
  'Common pitfalls to avoid in machine learning'
]

# Choose the model hyperparameters
batch_size = 128
seq_length = 10
rnn_units = 256

In [14]:
# Preprocess the texts
texts = [text.lower() for text in texts]
texts = [re.sub(r'[^a-zA-Z0-9\s]', '', text) for text in texts]

# Create the vocabulary
vocab = set()
for text in texts:
    vocab.update(text.split())
vocab = {token: index for index, token in enumerate(vocab)}
vocab_size = len(vocab)

# Convert the texts to sequences of word indices
sequences = []
for text in texts:
    sequence = [vocab[token] for token in text.split()]
    sequences.append(sequence)

# Split the sequences into training and validation sets
x_train, x_val = train_test_split(sequences, test_size=0.1, random_state=42)


In [3]:
# Generate new texts
def generate_text(model, prompt, num_words):
    prompt_tokens = prompt.split()
    input_tokens = prompt_tokens[-seq_length:]
    input_indices = [vocab[token] for token in input_tokens]
    input_indices = np.expand_dims(input_indices, axis=0)
    generated_tokens = []
    for _ in range(num_words):
        probs = model.predict(input_indices)[0,-1,:]
        index = np.random.choice(len(vocab), p=probs)
        generated_tokens.append(vocab[index])
        input_indices = np.concatenate([input_indices, np.expand_dims(index, axis=1)], axis=1)
    return ' '.join(generated_tokens)

In [None]:
# Generate 10 words based on a prompt
prompt = 'How to be a good machine learning engineer'
generated_text = generate_text(model, prompt, 10)
print(generated_text)