# Train with more data

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import random

## Configuration

In [2]:
SEQ_LENGTH = 100
HIDDEN_SIZE = 128
NUM_LAYERS = 1
NUM_EPOCHS = 300
LR = 0.005

## Load and preprocess article

In [3]:
with open("article.txt", "r", encoding="utf-8") as f:
    text = f.read().replace('\n', '').replace('.', '').lower()
text

"nasa's mars rover has discovered new evidence of ancient water on the red planet rock samples taken from the surface show signs of mineral deposits that typically form in the presence of water this discovery strengthens the theory that mars may have once supported microbial life scientists are now planning future missions to explore these areas further"

In [4]:
# Create character mappings
chars = sorted(list(set(text)))
char2idx = {ch: i for i, ch in enumerate(chars)}
idx2char = {i: ch for ch, i in char2idx.items()}
vocab_size = len(chars)
idx2char

{0: ' ',
 1: "'",
 2: 'a',
 3: 'b',
 4: 'c',
 5: 'd',
 6: 'e',
 7: 'f',
 8: 'g',
 9: 'h',
 10: 'i',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'r',
 18: 's',
 19: 't',
 20: 'u',
 21: 'v',
 22: 'w',
 23: 'x',
 24: 'y'}

## Prepare sequences

In [5]:
def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = char2idx[string[c]]
    return tensor

def get_batch(seq_length=SEQ_LENGTH):
    start_index = random.randint(0, len(text) - seq_length - 1)
    end_index = start_index + seq_length + 1
    chunk = text[start_index:end_index]
    input_seq = char_tensor(chunk[:-1])
    target_seq = char_tensor(chunk[1:])
    return input_seq, target_seq

## RNN Model

In [6]:
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, NUM_LAYERS, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, input_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out.reshape(out.size(0)*out.size(1), out.size(2)))
        return out, hidden

    def init_hidden(self):
        return torch.zeros(NUM_LAYERS, 1, self.hidden_size)

# Initialize model
model = CharRNN(vocab_size, HIDDEN_SIZE, vocab_size)
optimizer = optim.Adam(model.parameters(), lr=LR)
loss_fn = nn.CrossEntropyLoss()

## Training

In [7]:
print("Training...")
for epoch in range(NUM_EPOCHS):
    inputs, targets = get_batch()
    inputs = inputs.unsqueeze(0)
    targets = targets.view(-1)

    hidden = model.init_hidden()
    optimizer.zero_grad()
    output, hidden = model(inputs, hidden)
    loss = loss_fn(output, targets)
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Loss: {loss.item():.4f}")


Training...
Epoch 10/300, Loss: 2.0565
Epoch 20/300, Loss: 1.6593
Epoch 30/300, Loss: 1.4354
Epoch 40/300, Loss: 1.3993
Epoch 50/300, Loss: 0.5734
Epoch 60/300, Loss: 0.3261
Epoch 70/300, Loss: 0.3978
Epoch 80/300, Loss: 0.1777
Epoch 90/300, Loss: 0.1346
Epoch 100/300, Loss: 0.1430
Epoch 110/300, Loss: 0.2036
Epoch 120/300, Loss: 0.1271
Epoch 130/300, Loss: 0.1130
Epoch 140/300, Loss: 0.2805
Epoch 150/300, Loss: 0.1502
Epoch 160/300, Loss: 0.1120
Epoch 170/300, Loss: 0.0662
Epoch 180/300, Loss: 0.0927
Epoch 190/300, Loss: 0.0892
Epoch 200/300, Loss: 0.1328
Epoch 210/300, Loss: 0.0313
Epoch 220/300, Loss: 0.1361
Epoch 230/300, Loss: 0.1864
Epoch 240/300, Loss: 0.0641
Epoch 250/300, Loss: 0.0749
Epoch 260/300, Loss: 0.0221
Epoch 270/300, Loss: 0.0800
Epoch 280/300, Loss: 0.0443
Epoch 290/300, Loss: 0.0912
Epoch 300/300, Loss: 0.0951


## Text generation

In [11]:
def generate_answer(prompt, max_len=72):
    model.eval()
    with torch.no_grad():
        input_seq = char_tensor(prompt.lower()).unsqueeze(0)
        hidden = model.init_hidden()
        output_chars = list(prompt)

        for i in range(max_len):
            output, hidden = model(input_seq, hidden)
            last_char_logits = output[-1]
            _, top_idx = torch.topk(last_char_logits, 1)
            predicted_char = idx2char[top_idx.item()]
            output_chars.append(predicted_char)
            input_seq = char_tensor(predicted_char).unsqueeze(0)

        return ''.join(output_chars)

## Ask questions

In [12]:
question = 'What did the Mars rover find'
answer = generate_answer(question)
print(answer)

What did the Mars rover find the theory that mars may have once supported microbial life scientists 


In [10]:
question = 'What are scientists planning'
answer = generate_answer(question)
print(answer)

What are scientists planning future missions to explore these areas furm mnce of water this discovery streng
