In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import time
from sklearn.model_selection import train_test_split

In [8]:
# Check for GPU availability and set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [9]:
# Text sequence
text = """Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text.

At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model.

One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks.

Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time.

Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants.

In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."""

# Preprocess the text
chars = sorted(set(text))
char_to_idx = {ch: idx for idx, ch in enumerate(chars)}
idx_to_char = {idx: ch for idx, ch in enumerate(chars)}

def encode_text(text):
    return [char_to_idx[ch] for ch in text]

def one_hot_encode(indices, dict_size):
    features = np.zeros((len(indices), dict_size), dtype=np.float32)
    for i, idx in enumerate(indices):
        features[i, idx] = 1.0
    return features

# Create input-output pairs for training
def create_training_data(text, seq_length):
    input_data = []
    target_data = []
    for i in range(len(text) - seq_length):
        input_seq = text[i:i+seq_length]
        target_char = text[i+seq_length]
        input_data.append(one_hot_encode(encode_text(input_seq), len(chars)))
        target_data.append(char_to_idx[target_char])
    return torch.tensor(input_data), torch.tensor(target_data)

# Define the model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model_type="rnn"):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.model_type = model_type

        if model_type == "rnn":
            self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        elif model_type == "lstm":
            self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True)
        elif model_type == "gru":
            self.rnn = nn.GRU(input_size, hidden_size, batch_first=True)

        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out[:, -1, :])
        return out, hidden

    def init_hidden(self, batch_size):
        if self.model_type == "lstm":
            return (torch.zeros(1, batch_size, self.hidden_size).to(device),
                    torch.zeros(1, batch_size, self.hidden_size).to(device))
        else:
            return torch.zeros(1, batch_size, self.hidden_size).to(device)

# Training function
def train(model, input_data, target_data, val_input, val_target, epochs, batch_size, lr=0.01):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    start_time = time.time()
    best_acc = 0

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for i in range(0, input_data.size(0), batch_size):
            batch_input = input_data[i:i+batch_size].to(device)
            batch_target = target_data[i:i+batch_size].to(device)
            hidden = model.init_hidden(batch_input.size(0))
            optimizer.zero_grad()
            output, hidden = model(batch_input, hidden)
            loss = criterion(output, batch_target)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / (input_data.size(0) // batch_size)
        val_acc = evaluate(model, val_input.to(device), val_target.to(device), batch_size)

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), f'{model.rnn.__class__.__name__}_best.pth')

        if epoch % 100 == 0:
            print(f'Epoch: {epoch} Loss: {avg_loss:.4f} Val Acc: {val_acc:.4f}')

    print(f'Training Time: {time.time() - start_time} seconds')

# Evaluation function
def evaluate(model, input_data, target_data, batch_size):
    model.eval()
    correct = 0
    with torch.no_grad():
        for i in range(0, input_data.size(0), batch_size):
            batch_input = input_data[i:i+batch_size]
            batch_target = target_data[i:i+batch_size]
            hidden = model.init_hidden(batch_input.size(0))
            output, hidden = model(batch_input, hidden)
            _, predicted = torch.max(output, 1)
            correct += (predicted == batch_target).sum().item()
    return correct / input_data.size(0)

# Main function to run the training
def main():
    seq_lengths = [10, 20, 30]
    hidden_size = 128
    epochs = 1000
    batch_size = 64
    models = ["rnn", "lstm", "gru"]

    for seq_length in seq_lengths:
        input_data, target_data = create_training_data(text, seq_length)
        train_input, val_input, train_target, val_target = train_test_split(input_data, target_data, test_size=0.2, random_state=42)
        for model_type in models:
            print(f'\nTraining {model_type.upper()} with sequence length {seq_length}')
            model = CharRNN(len(chars), hidden_size, len(chars), model_type=model_type).to(device)
            train(model, train_input, train_target, val_input, val_target, epochs, batch_size)

if __name__ == "__main__":
    main()


Training RNN with sequence length 10
Epoch: 0 Loss: 3.3659 Val Acc: 0.1111
Epoch: 100 Loss: 1.6689 Val Acc: 0.3816
Epoch: 200 Loss: 1.4321 Val Acc: 0.3962
Epoch: 300 Loss: 1.9719 Val Acc: 0.3543
Epoch: 400 Loss: 1.1042 Val Acc: 0.4130
Epoch: 500 Loss: 1.4490 Val Acc: 0.4340
Epoch: 600 Loss: 1.2345 Val Acc: 0.4465
Epoch: 700 Loss: 1.2049 Val Acc: 0.4319
Epoch: 800 Loss: 1.0078 Val Acc: 0.4549
Epoch: 900 Loss: 1.3177 Val Acc: 0.4423
Training Time: 41.446280002593994 seconds

Training LSTM with sequence length 10
Epoch: 0 Loss: 3.2722 Val Acc: 0.1426
Epoch: 100 Loss: 0.0383 Val Acc: 0.4130
Epoch: 200 Loss: 0.0917 Val Acc: 0.4675
Epoch: 300 Loss: 0.0359 Val Acc: 0.4340
Epoch: 400 Loss: 0.0367 Val Acc: 0.4696
Epoch: 500 Loss: 0.0340 Val Acc: 0.4298
Epoch: 600 Loss: 0.0365 Val Acc: 0.4528
Epoch: 700 Loss: 0.0336 Val Acc: 0.4696
Epoch: 800 Loss: 0.0364 Val Acc: 0.4780
Epoch: 900 Loss: 0.0368 Val Acc: 0.4864
Training Time: 42.56746196746826 seconds

Training GRU with sequence length 10
Epoch:

In [10]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Main function to run the training
def main():
    seq_lengths = [10, 20, 30]
    hidden_size = 128
    epochs = 1000
    batch_size = 64
    models = ["rnn", "lstm", "gru"]

    for seq_length in seq_lengths:
        input_data, target_data = create_training_data(text, seq_length)
        train_input, val_input, train_target, val_target = train_test_split(input_data, target_data, test_size=0.2, random_state=42)
        for model_type in models:
            print(f'\nTraining {model_type.upper()} with sequence length {seq_length}')
            model = CharRNN(len(chars), hidden_size, len(chars), model_type=model_type).to(device)
            print(f'Model Size (Number of Parameters): {count_parameters(model)}')
            train(model, train_input, train_target, val_input, val_target, epochs, batch_size)

if __name__ == "__main__":
    main()


Training RNN with sequence length 10
Model Size (Number of Parameters): 28205
Epoch: 0 Loss: 3.4678 Val Acc: 0.1174
Epoch: 100 Loss: 1.3404 Val Acc: 0.3669
Epoch: 200 Loss: 1.5257 Val Acc: 0.4046
Epoch: 300 Loss: 1.3716 Val Acc: 0.3983
Epoch: 400 Loss: 1.0262 Val Acc: 0.4423
Epoch: 500 Loss: 2.1423 Val Acc: 0.3522
Epoch: 600 Loss: 1.7440 Val Acc: 0.3816
Epoch: 700 Loss: 1.7582 Val Acc: 0.4151
Epoch: 800 Loss: 1.5405 Val Acc: 0.3983
Epoch: 900 Loss: 2.1467 Val Acc: 0.3396
Training Time: 42.70583724975586 seconds

Training LSTM with sequence length 10
Model Size (Number of Parameters): 95405
Epoch: 0 Loss: 3.2891 Val Acc: 0.1426
Epoch: 100 Loss: 0.0381 Val Acc: 0.4423
Epoch: 200 Loss: 0.0377 Val Acc: 0.4298
Epoch: 300 Loss: 0.0446 Val Acc: 0.4675
Epoch: 400 Loss: 0.0341 Val Acc: 0.4486
Epoch: 500 Loss: 0.0346 Val Acc: 0.4864
Epoch: 600 Loss: 0.0399 Val Acc: 0.4885
Epoch: 700 Loss: 0.0332 Val Acc: 0.4864
Epoch: 800 Loss: 0.0322 Val Acc: 0.4822
Epoch: 900 Loss: 0.0336 Val Acc: 0.4864
Trai