Import Libraries

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

Load and Preprocess Text

In [3]:
with open('pg2265.txt', 'r', encoding='utf-8') as f:
    text = f.read()[15858:]

chars = sorted(set(text))
char2int = {ch: i for i, ch in enumerate(chars)}
int2char = dict(enumerate(chars))
encoded = np.array([char2int[ch] for ch in text])
vocab_size = len(chars)

Prepare Sequences

In [4]:
seq_length = 50  # increased from 10 to 50
sequences, targets = [], []

for i in range(len(encoded) - seq_length):
    sequences.append(encoded[i:i + seq_length])
    targets.append(encoded[i + seq_length])

sequences = np.array(sequences)
targets = np.array(targets)

Define CharRNN Model

In [5]:
class CharRNN(Model):
    def __init__(self, vocab_size, embedding_dim=64, lstm_units=128):
        super(CharRNN, self).__init__()
        self.embedding = Embedding(vocab_size, embedding_dim)
        self.lstm = LSTM(lstm_units, return_sequences=False)
        self.dropout = Dropout(0.2)
        self.dense = Dense(vocab_size, activation='softmax')

    def call(self, x):
        x = self.embedding(x)
        x = self.lstm(x)
        x = self.dropout(x)
        return self.dense(x)

Train the Model & Save Trained Weights

In [6]:
model = CharRNN(vocab_size)
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

model.fit(sequences, targets, epochs=5, batch_size=16)

model.save_weights('char_rnn_tf2.weights.h5')

Epoch 1/5
[1m10175/10175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m382s[0m 37ms/step - accuracy: 0.3323 - loss: 2.4126
Epoch 2/5
[1m10175/10175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m383s[0m 37ms/step - accuracy: 0.4749 - loss: 1.8080
Epoch 3/5
[1m10175/10175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m391s[0m 38ms/step - accuracy: 0.5089 - loss: 1.6737
Epoch 4/5
[1m10175/10175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m390s[0m 38ms/step - accuracy: 0.5263 - loss: 1.6048
Epoch 5/5
[1m10175/10175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m437s[0m 38ms/step - accuracy: 0.5379 - loss: 1.5574


Text Generation Function

In [7]:
def sample_text(model, start_string, length=500, temperature=0.5):
    input_eval = [char2int[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)  # shape: (1, seq_length)
    result = list(start_string)

    for _ in range(length):
        predictions = model(input_eval)  # shape: (1, vocab_size)
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(tf.math.log(predictions), num_samples=1)[0, 0].numpy()

        result.append(int2char[predicted_id])
        input_eval = tf.expand_dims([predicted_id], 0)  # shape: (1, 1)

    return ''.join(result)


Reload Model and Generate

In [8]:
model = CharRNN(vocab_size)
_ = model(tf.zeros((1, seq_length), dtype=tf.int32))
model.load_weights('char_rnn_tf2.weights.h5')


final Output

In [9]:
start_seed = "The moonlight shone through the window, and "
print(sample_text(model, start_string=start_seed, length=500, temperature=0.6))

The moonlight shone through the window, and rCfAjmsDumx
SEd1kjY&
Qus& ;1k(
DikchEKevusK CyKe(Kon.1?1] pQu HEKnjcie(O l-d1NWw1k(M]G[NE(T?1] K]Kik(WaqunjsCclzKpn&,1g1& CchE](STevnjk
WrEKsPll[L&DEKnVWZ&djRfDCgZ& NgjsWe
1pwQun]?1& B&.1Lja:1y&!1y&
WKh(HEK(Tw1y& fGuifr] mOiS]HEar;1crchEKY1sTevEGu HE1HEQuY&vY&,1knRounj(Fay&-HEKie-
Rk(BZ& I fm-L&EB&vbZQunjDa!1:1HENMEY& H?1kponjsN&
;1Wh(AoFrpQulIvsM!1Klgj;1L&'Nk
Tw1.1Fo-HEqb&O SxEPsfsB(DuanlapK'HEB(NoD&;1jTnsE(CrESDeY& Pa t&-ECkz&;1kpOclgjsO GhjbE(NjnwQunjWjb&Vpqbj(Guied1z&FCw1Aqb(TMEGus(BY1.1pOWA


Spell Correction

In [10]:
from difflib import get_close_matches
from nltk.corpus import words
import nltk
nltk.download('words')

word_vocab = set(words.words())

def correct_spelling(word, vocab):
    candidates = get_close_matches(word, vocab, n=1, cutoff=0.8)
    return candidates[0] if candidates else word


input_words = ["fo", "moonligt", "wndow", "languae", "texxt", "neaural"]
corrected = [correct_spelling(word, word_vocab) for word in input_words]
print("Corrected words:", corrected)

[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


Corrected words: ['fro', 'moonlight', 'window', 'language', 'text', 'neural']


Character-level Sentiment Proxy

In [11]:
from textblob import TextBlob

def real_sentiment(text):
    blob = TextBlob(text)
    polarity = blob.sentiment.polarity
    if polarity > 0:
        return "Positive"
    elif polarity < 0:
        return "Negative"
    else:
        return "Neutral"


generated_text = sample_text(model, " we and i wandered alone, the light glimmered from afar ", 100)
print("Text:", generated_text)
print("Sentiment estimate:", real_sentiment(generated_text))



Text:  we and i wandered alone, the light glimmered from afar Vevnjy&DonjgjxQ[Z&:1[&fI?&Y1kjvB(SCkB& njcnk!1;1kxEDa pz& WevW&
Ad1HEvb(NEKikjntrEKnjd:1(SSKnjKa t[L
Sentiment estimate: Positive


Text Autocompletion

In [12]:
def clean_input(text, valid_chars):
    return ''.join(ch for ch in text if ch in valid_chars)

def autocomplete(prefix, model, max_completion=50, temperature=0.5, fallback="The"):
    cleaned_prefix = clean_input(prefix, char2int.keys())
    if not cleaned_prefix:
        cleaned_prefix = fallback
    return sample_text(model, start_string=cleaned_prefix, length=max_completion, temperature=temperature)

# --- Example Usage ---
inputs = ["The sur"]
for inp in inputs:
    print(f"\nInput: '{inp}'")
    print("→ Autocompletion:", autocomplete(inp, model))


Input: 'The sur'
→ Autocompletion: The surnjlWhY& ML&!1i[I,1ulg1L&WjnjsO [Kevnjrry&M?1kz(MWj
