In [1]:
import numpy as np

from keras.models import Sequential

from keras.layers import LSTM, Dense, Dropout

from keras.callbacks import ModelCheckpoint

import requests

ModuleNotFoundError: No module named 'keras'

In [None]:
# ============================================
# üìò CHAR-LEVEL LSTM TEXT GENERATOR
# Trains a model to generate text similar to Shakespeare
# ============================================

# Import required libraries
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import ModelCheckpoint
import requests
import random
import sys

# ----------------------------------------------------------
# 1Ô∏è‚É£ LOAD DATA
# ----------------------------------------------------------

# Download the Tiny Shakespeare dataset from GitHub
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"

# Take only the first 100,000 characters for faster training
text = requests.get(url).text[:100000]

# Get all unique characters in the text (a‚Äìz, A‚ÄìZ, punctuation, etc.)
chars = sorted(list(set(text)))

# Create dictionaries to convert characters ‚Üí numbers and numbers ‚Üí characters
char_to_int = {c: i for i, c in enumerate(chars)}
int_to_char = {i: c for i, c in enumerate(chars)}

# Show some info about the dataset
print(f"Unique characters: {len(chars)}")
print(f"First 10 characters: {chars[:10]}")

# ----------------------------------------------------------
# 2Ô∏è‚É£ PREPARE TRAINING DATA
# ----------------------------------------------------------

# Sequence length ‚Äî how many previous characters the model uses to predict the next one
seq_length = 100

# These lists will store input (dataX) and target output (dataY)
dataX, dataY = [], []

# Loop through the text and create sequences
# Example: if seq_length=5, use chars [0..4] to predict char[5]
for i in range(0, len(text) - seq_length):
    seq_in = text[i:i + seq_length]         # 100-character input sequence
    seq_out = text[i + seq_length]          # the next character after the sequence
    dataX.append([char_to_int[char] for char in seq_in])  # encode input as integers
    dataY.append(char_to_int[seq_out])                   # encode target char

# Number of total patterns created
n_patterns = len(dataX)
# Number of unique characters (vocabulary size)
n_vocab = len(chars)
print("Total Patterns:", n_patterns)

# Reshape input data to be [samples, time steps, features]
# Each input sequence has 100 time steps and 1 feature per step
X = np.reshape(dataX, (n_patterns, seq_length, 1))

# Normalize data to 0‚Äì1 range (helps training)
X = X / float(n_vocab)

# Convert output to one-hot encoded vectors
# (for classification among all characters)
Y = np.eye(n_vocab)[dataY]

# ----------------------------------------------------------
# 3Ô∏è‚É£ BUILD THE MODEL
# ----------------------------------------------------------

# Sequential model = stack of layers
model = Sequential()

# First LSTM layer with 256 memory units, returning full sequence
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
# Dropout layer helps prevent overfitting (randomly disables 20% of neurons)
model.add(Dropout(0.2))

# Second LSTM layer (final recurrent layer)
model.add(LSTM(256))
model.add(Dropout(0.2))

# Output layer ‚Äî predicts probability for each character
model.add(Dense(n_vocab, activation='softmax'))

# Compile model using categorical crossentropy (for multi-class classification)
# and Adam optimizer for efficient gradient updates
model.compile(loss='categorical_crossentropy', optimizer='adam')

# ----------------------------------------------------------
# 4Ô∏è‚É£ SAVE BEST MODEL DURING TRAINING
# ----------------------------------------------------------

# Save model weights only when loss improves
checkpoint = ModelCheckpoint(
    "text_gen_model.h5",     # file to save the model
    monitor='loss',          # track training loss
    verbose=1,               # print updates
    save_best_only=True,     # save only if loss decreases
    mode='min'               # lower loss = better
)

# ----------------------------------------------------------
# 5Ô∏è‚É£ TRAIN THE MODEL
# ----------------------------------------------------------

# Train model for 20 epochs, batch size of 128
# You can increase epochs for better results
model.fit(X, Y, epochs=20, batch_size=128, callbacks=[checkpoint])

# ----------------------------------------------------------
# 6Ô∏è‚É£ GENERATE TEXT
# ----------------------------------------------------------

# Pick a random starting point (seed sequence)
start = np.random.randint(0, len(dataX) - 1)
pattern = dataX[start]

# Print the seed text (so we can see where generation starts)
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")

# ----------------------------------------------------------
# 7Ô∏è‚É£ TEXT GENERATION LOOP
# ----------------------------------------------------------

generated_text = ""  # store generated output

# Generate 1000 characters, one by one
for i in range(1000):
    # Reshape the pattern to match LSTM input
    x = np.reshape(pattern, (1, len(pattern), 1))
    # Normalize input same as training
    x = x / float(n_vocab)

    # Predict next character probabilities
    prediction = model.predict(x, verbose=0)
    # Pick the most probable character (argmax)
    index = np.argmax(prediction)
    # Convert predicted index back to character
    result = int_to_char[index]
    # Append to generated text
    generated_text += result

    # Update input pattern ‚Äî add new char, remove first one
    pattern.append(index)
    pattern = pattern[1:len(pattern)]

# ----------------------------------------------------------
# 8Ô∏è‚É£ PRINT THE RESULT
# ----------------------------------------------------------

print("\nGenerated text:\n")
print(generated_text)


In [None]:
# Model o‚Äòqitilgandan keyin, faqat generatsiya qismini qayta ishga tushiring

generated_text = ""
for i in range(1000):
    x = np.reshape(pattern, (1, len(pattern), 1)) / float(n_vocab)
    prediction = model.predict(x, verbose=0)
    index = np.argmax(prediction)
    result = int_to_char[index]
    generated_text += result
    pattern.append(index)
    pattern = pattern[1:len(pattern)]

print("\nYaratilgan matn:\n")
print(generated_text)


In [None]:
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"


text = requests.get(url).text[:10000]

chars = sorted(list(set(text)))

char_to_int = {c:i for i, c in enumerate(chars)}
int_to_char = {i:c for i, c in enumerate(chars)}


print(f"Unique characters: {len(chars)}")
print(f"First 10 characters: {chars[:10]}")

seq_length = 100 # Corrected typo
dataX, dataY = [], []


for i in range(0, len(text) - seq_length): # Corrected typo
    seq_in = text[i:i + seq_length] # Corrected typo
    seq_out = text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])

n_patterns = len(dataX)
n_vocab = len(chars)
print("Total Patterns:", n_patterns)


X = np.reshape(dataX, (n_patterns, seq_length, 1)) # Corrected variable name
X = X / float(n_vocab) # Added closing parenthesis

In [None]:
model = Sequential()
model.add(LSTM(256, input_shape=(x.shape[1], x.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))

model.add(Dense(n_vocab, activation='softmax'))


model.compile(loss='categorical_crossentropy', optimizer='adam')