In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Simple preprocessing function
def prepare_data(text):
    """Convert text to character-level sequences."""
    chars = list(set(text))
    char_to_idx = {ch: i for i, ch in enumerate(chars)}
    idx_to_char = {i: ch for i, ch in enumerate(chars)}

    # Convert text to indices
    data = [char_to_idx[ch] for ch in text]
    return data, char_to_idx, idx_to_char


In [None]:
# Simple dataset class
class TextDataset(Dataset):
    def __init__(self, data, sequence_length):
        self.data = data
        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.data) - self.sequence_length

    def __getitem__(self, index):
        # Get input sequence and target
        x = self.data[index:index + self.sequence_length]
        y = self.data[index + 1:index + self.sequence_length + 1]
        return torch.tensor(x), torch.tensor(y)

# Simple language model
class SimpleLanguageModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim=32, hidden_dim=64):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out, _ = self.lstm(embedded)
        output = self.fc(lstm_out)
        return output


In [None]:
# Training function
def train_model(model, train_loader, epochs=10, learning_rate=0.01):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()
            output = model(batch_x)
            loss = criterion(output.view(-1, output.shape[-1]), batch_y.view(-1))

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}')



In [None]:
# Text generation function
def generate_text(model, seed_text, char_to_idx, idx_to_char, length=100):
    model.eval()
    current_chars = [char_to_idx[ch] for ch in seed_text]
    generated_text = seed_text

    with torch.no_grad():
        for _ in range(length):
            # Prepare input
            x = torch.tensor([current_chars])

            # Get prediction
            output = model(x)
            next_char_idx = torch.argmax(output[0, -1]).item()

            # Add predicted character
            generated_text += idx_to_char[next_char_idx]
            current_chars = current_chars[1:] + [next_char_idx]

    return generated_text


In [None]:
# Sample text
text = """
The quick brown fox jumps over the lazy dog.
The lazy dog sleeps while the quick brown fox runs.
The fox and dog play together in the garden.
"""

# Prepare data
sequence_length = 20
data, char_to_idx, idx_to_char = prepare_data(text)

# Create dataset and dataloader
dataset = TextDataset(data, sequence_length)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialize model
model = SimpleLanguageModel(len(char_to_idx))

# Train model
train_model(model, train_loader, epochs=50)

Epoch 1/50, Loss: 3.2514
Epoch 2/50, Loss: 2.6725
Epoch 3/50, Loss: 2.0588
Epoch 4/50, Loss: 1.4824
Epoch 5/50, Loss: 1.0144
Epoch 6/50, Loss: 0.6787
Epoch 7/50, Loss: 0.4695
Epoch 8/50, Loss: 0.3365
Epoch 9/50, Loss: 0.2577
Epoch 10/50, Loss: 0.2081
Epoch 11/50, Loss: 0.1769
Epoch 12/50, Loss: 0.1569
Epoch 13/50, Loss: 0.1391
Epoch 14/50, Loss: 0.1299
Epoch 15/50, Loss: 0.1217
Epoch 16/50, Loss: 0.1183
Epoch 17/50, Loss: 0.1148
Epoch 18/50, Loss: 0.1112
Epoch 19/50, Loss: 0.1113
Epoch 20/50, Loss: 0.1054
Epoch 21/50, Loss: 0.1061
Epoch 22/50, Loss: 0.1041
Epoch 23/50, Loss: 0.1041
Epoch 24/50, Loss: 0.1040
Epoch 25/50, Loss: 0.1018
Epoch 26/50, Loss: 0.1002
Epoch 27/50, Loss: 0.1007
Epoch 28/50, Loss: 0.0974
Epoch 29/50, Loss: 0.0983
Epoch 30/50, Loss: 0.0974
Epoch 31/50, Loss: 0.0963
Epoch 32/50, Loss: 0.0969
Epoch 33/50, Loss: 0.0988
Epoch 34/50, Loss: 0.0981
Epoch 35/50, Loss: 0.0984
Epoch 36/50, Loss: 0.0953
Epoch 37/50, Loss: 0.0957
Epoch 38/50, Loss: 0.0951
Epoch 39/50, Loss: 0.

In [None]:
# Generate text
seed = "The quick"
generated = generate_text(model, seed, char_to_idx, idx_to_char, length=100)
print("\nGenerated text:")
print(generated)


Generated text:
The quick brown fox runs.
The fox and dog play together in the lazy dog sleeps while the quick brown fox runs


In [None]:
# Generate text
seed = "garden"
generated = generate_text(model, seed, char_to_idx, idx_to_char, length=100)
print("\nGenerated text:")
print(generated)


Generated text:
garden.
The fox and dog play together in together in together in together in together in together in toget
