In [8]:
import re
import numpy as np

# Create character-to-index (char2idx) and index-to-character (idx2char) mappings
vocab = sorted(set(text))
seq_length = 40
char2idx = {char: idx for idx, char in enumerate(vocab)}
char2idx['<UNK>'] = len(char2idx)  # Handle unknown characters
idx2char = np.array(vocab + ['<UNK>'])

text = "This phone has an amazing display and great battery life! Overall, it's a great phone for everyday use, with top-notch performance. I would highly recommend it to anyone looking for a new phone."
text = re.sub(r'[^a-zA-Z0-9 ]+', '', text.lower())

from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
sequences = tokenizer.texts_to_sequences([text])[0]

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

model = Sequential([
    Embedding(input_dim=len(vocab), output_dim=64, input_length=seq_length),
    LSTM(128, return_sequences=True, recurrent_dropout=0.2),
    LSTM(128, return_sequences=False, recurrent_dropout=0.2),
    Dropout(0.2),
    Dense(len(vocab), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [13]:
def sample_with_temperature(predictions, temperature=1.0):
    predictions = np.asarray(predictions).astype('float64')
    predictions = np.log(predictions + 1e-8) / temperature
    exp_preds = np.exp(predictions)
    predictions = exp_preds / np.sum(exp_preds)
    return np.random.choice(len(predictions), p=predictions)

def generate_text(model, start_string, gen_length=100, temperature=1.0):
    input_eval = [char2idx.get(char, char2idx['<UNK>']) for char in start_string]
    input_eval = np.array(input_eval).reshape(1, -1)
    generated_text = start_string
    for _ in range(gen_length):
        predictions = model.predict(input_eval, verbose=0)
        predicted_idx = sample_with_temperature(predictions[0], temperature)
        next_char = idx2char[predicted_idx] if predicted_idx < len(idx2char) else '<UNK>'
        generated_text += next_char
        input_eval = np.append(input_eval[:, 1:], [[predicted_idx]], axis=1)
    return generated_text

In [14]:
start_string = u"phone is"
print(generate_text(model, start_string, gen_length=200))

phone isT!.st TtTzOeu-sgiwpd'k .Ibf!TnmvIllnszch.ka-oTzIT'I!gno'i!!t ,hdvl' I-gt'-mu'elduzweapm!OfIlhTTmvh'ot'e.-da.c kbudfyfilckOevbanssunvOmb'pb-uTbitywsma,dgm-naivnncaayIlTmhgbsktu-zk!vdklhTfkdTpgp-pk,zkom
