# Building_LLM_From_Scratch__Experiment

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/

/content/drive/MyDrive


#### Imported Libraries and Packages

In [3]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import mmap
import string
import random
import pickle

#### Runtime Type

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


#### Read the Text Corpus Demo

In [None]:
with open('wizard_of_oz.txt', 'r', encoding='utf-8') as f:
  text = f.read()
chars = sorted(set(text))
print(chars)

['\n', ' ', '!', '"', '&', "'", '(', ')', '*', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


#### Encoder and Decoder Function Demo

In [None]:
string_to_int = { ch:i for i,ch in enumerate(chars) }
int_to_string = { i:ch for i,ch in enumerate(chars) }
encode = lambda s: [string_to_int[c] for c in s]
decode = lambda l: ''.join([int_to_string[i] for i in l])

# print(encode('hello')) # Output [61, 58, 65, 65, 68]
# print(decode([61, 58, 65, 65, 68])) # Output hello
data = torch.tensor(encode(text), dtype=torch.long)

#### Batching Demo

In [None]:
n = int(0.8*len(data))
train_data = data[:n]
val_data = data[n:]

def get_batch(split):
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

x, y = get_batch('train')
print('inputs:')
# print(x.shape)
print(x)
print('targets:')
print(y)

inputs:
tensor([[73, 61, 62, 67, 64,  1, 68, 59],
        [71, 54, 56, 73, 62, 56, 54, 65],
        [ 1, 78, 68, 74,  1, 73, 68,  1],
        [58, 65, 65,  1, 78, 68, 74,  1]])
targets:
tensor([[61, 62, 67, 64,  1, 68, 59,  1],
        [54, 56, 73, 62, 56, 54, 65, 65],
        [78, 68, 74,  1, 73, 68,  1, 78],
        [65, 65,  1, 78, 68, 74,  1, 57]])


## Building and Training the GPT Model

#### Opening the Train or Validation Split

In [5]:
train_split_file_name = "wizard_of_oz_train_split.txt"
validation_split_file_name = "wizard_of_oz_val_split.txt"

##### Training Split Vocab Size [232 KB]

In [8]:
train_chars = ""
with open(f"{train_split_file_name}", 'r', encoding='utf-8') as f:
        text = f.read()
        train_chars = sorted(list(set(text)))

vocab_size = len(train_chars)
vocab_size
# print(train_chars)

80

##### Validation Split Vocab Size [81 KB]

In [9]:
val_chars = ""
with open(f"{validation_split_file_name}", 'r', encoding='utf-8') as f:
        text = f.read()
        val_chars = sorted(list(set(text)))

vocab_size_val = len(val_chars)
vocab_size_val
# print(vocab_size_val)

65

#### Hyperparameter Setup

In [18]:
batch_size = 32
block_size = 128
max_iters = 10000
learning_rate = 1e-4
eval_iters = 100
n_embd = 384
n_head = 4
n_layer = 4
dropout = 0.2

#### Encoder and Decoder Setup

In [7]:
# Add all unique characters, including special characters and numbers plus new Line characters
all_chars = string.ascii_letters + string.digits + string.punctuation + ' ' + ' \n'

# Assign the New Vocab size based on the Number of unique characters in our corpus
vocab_size = len(all_chars)

# Create string_to_int and int_to_string dictionaries
string_to_int = {ch: i for i, ch in enumerate(all_chars)}
int_to_string = {i: ch for i, ch in enumerate(all_chars)}

# Lambda functions for encoding and decoding
encode = lambda s: [string_to_int[c] for c in s]
decode = lambda l: ''.join([int_to_string[i] for i in l])

# Usage:
text = "Hello, Brook age 27!\nNew line."
encoded_text = encode(text)
decoded_text = decode(encoded_text)

# Output
print(f"Vocab size: {vocab_size}")
print(f"Original text: {text}")
print(f"Encoded text: {encoded_text}")
print(f"Decoded text: {decoded_text}")

Vocab size: 97
Original text: Hello, Brook age 27!
New line.
Encoded text: [33, 4, 11, 11, 14, 73, 95, 27, 17, 14, 14, 10, 95, 0, 6, 4, 95, 54, 59, 62, 96, 39, 4, 22, 95, 11, 8, 13, 4, 75]
Decoded text: Hello, Brook age 27!
New line.


#### Batching Setup with Memory Map Seeking

In [8]:
# memory map for using small snippets of text from a single file of any size
def get_random_chunk(split):
    filename = train_split_file_name if split == 'train' else validation_split_file_name
    with open(filename, 'rb') as f:
        with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
            # Determine the file size and a random position to start reading
            file_size = len(mm)

            # Ensure the start position is within the valid range
            max_start_pos = file_size - block_size * batch_size

            start_pos = random.randint(0, max_start_pos)

            # Seek to the random position and read the block of text
            mm.seek(start_pos)
            block = mm.read(block_size*(batch_size-1))

            # Decode the block to a string, ignoring any invalid byte sequences
            decoded_block = block.decode('utf-8', errors='ignore').replace('\r', '')

            # Train and test splits
            data = torch.tensor(encode(decoded_block), dtype=torch.long)

    return data


def get_batch(split):
    data = get_random_chunk(split)
    max_ix = len(data) - block_size - 1
    # Ensure the randomly selected indices are within the valid range
    ix = torch.randint(max_ix, (batch_size,))
    # ix = torch.randint(len(data) - block_size, (batch_size,))
    # Adjust the selected indices to prevent index out of range
    ix = torch.clamp(ix, 0, max_ix)
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

#### Loss Estimation for Training and Evaluation

In [9]:
class EstimateLoss:
    def __init__(self, model, eval_iters, get_batch):
        self.model = model
        self.eval_iters = eval_iters
        self.get_batch = get_batch

    @torch.no_grad()
    def estimate_loss(self):
        out = {}
        self.model.eval()

        for split in ['train', 'val']:
            losses = torch.zeros(self.eval_iters)
            for k in range(self.eval_iters):
                X, Y = self.get_batch(split)
                logits, loss = self.model(X, Y)
                losses[k] = loss.item()

            out[split] = losses.mean()

        self.model.train()
        return out

#### GPT Architecture

#### Head , MultiHeadAttention , FeedForward and Block Class

In [10]:
class Head(nn.Module):
    """ one head of self-attention """

    def __init__(self, head_size):
        super().__init__()
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        # input of size (batch, time-step, channels)
        # output of size (batch, time-step, head size)
        B,T,C = x.shape
        k = self.key(x)   # (B,T,hs)
        q = self.query(x) # (B,T,hs)
        # compute attention scores ("affinities")
        wei = q @ k.transpose(-2,-1) * k.shape[-1]**-0.5 # (B, T, hs) @ (B, hs, T) -> (B, T, T)
        wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf')) # (B, T, T)
        wei = F.softmax(wei, dim=-1) # (B, T, T)
        wei = self.dropout(wei)
        # perform the weighted aggregation of the values
        v = self.value(x) # (B,T,hs)
        out = wei @ v # (B, T, T) @ (B, T, hs) -> (B, T, hs)
        return out

# [1, 0, 0]
# [1, 0.6, 0]
# [1, 0.6, 0.4]
class MultiHeadAttention(nn.Module):
    """ multiple heads of self-attention in parallel """

    def __init__(self, num_heads, head_size):
        super().__init__()
        self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
        self.proj = nn.Linear(head_size * num_heads, n_embd)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        out = torch.cat([h(x) for h in self.heads], dim=-1) # (B, T, F) -> (B, T, [h1, h1, h1, h1, h2, h2, h2, h2, h3, h3, h3, h3])
        out = self.dropout(self.proj(out))
        return out


class FeedFoward(nn.Module):
    """ a simple linear layer followed by a non-linearity """

    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd),
            nn.Dropout(dropout),
        )

    def forward(self, x):
        return self.net(x)

class Block(nn.Module):
    """ Transformer block: communication followed by computation """

    def __init__(self, n_embd, n_head):
        # n_embd: embedding dimension, n_head: the number of heads we'd like
        super().__init__()
        head_size = n_embd // n_head
        self.sa = MultiHeadAttention(n_head, head_size)
        self.ffwd = FeedFoward(n_embd)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    def forward(self, x):
        y = self.sa(x)
        x = self.ln1(x + y)
        y = self.ffwd(x)
        x = self.ln2(x + y)
        return x

#### GPT Language Model Class

In [11]:
class GPTLanguageModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
        self.position_embedding_table = nn.Embedding(block_size, n_embd)
        self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd) # final layer norm
        self.lm_head = nn.Linear(n_embd, vocab_size)


        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def forward(self, index, targets=None):
        B, T = index.shape


        # idx and targets are both (B,T) tensor of integers
        tok_emb = self.token_embedding_table(index) # (B,T,C)
        pos_emb = self.position_embedding_table(torch.arange(T, device=device)) # (T,C)
        x = tok_emb + pos_emb # (B,T,C)
        x = self.blocks(x) # (B,T,C)
        x = self.ln_f(x) # (B,T,C)
        logits = self.lm_head(x) # (B,T,vocab_size)

        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss

    def generate(self, index, max_new_tokens):
        # index is (B, T) array of indices in the current context
        for _ in range(max_new_tokens):
            # crop idx to the last block_size tokens
            index_cond = index[:, -block_size:]
            # get the predictions
            logits, loss = self.forward(index_cond)
            # focus only on the last time step
            logits = logits[:, -1, :] # becomes (B, C)
            # apply softmax to get probabilities
            probs = F.softmax(logits, dim=-1) # (B, C)
            # sample from the distribution
            index_next = torch.multinomial(probs, num_samples=1) # (B, 1)
            # append sampled index to the running sequence
            index = torch.cat((index, index_next), dim=1) # (B, T+1)
        return index

model = GPTLanguageModel(vocab_size)
# print('loading model parameters...')
# with open('model-01.pkl', 'rb') as f:
#     model = pickle.load(f)
# print('loaded successfully!')
m = model.to(device)
model

GPTLanguageModel(
  (token_embedding_table): Embedding(97, 384)
  (position_embedding_table): Embedding(128, 384)
  (blocks): Sequential(
    (0): Block(
      (sa): MultiHeadAttention(
        (heads): ModuleList(
          (0-3): 4 x Head(
            (key): Linear(in_features=384, out_features=96, bias=False)
            (query): Linear(in_features=384, out_features=96, bias=False)
            (value): Linear(in_features=384, out_features=96, bias=False)
            (dropout): Dropout(p=0.2, inplace=False)
          )
        )
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (dropout): Dropout(p=0.2, inplace=False)
      )
      (ffwd): FeedFoward(
        (net): Sequential(
          (0): Linear(in_features=384, out_features=1536, bias=True)
          (1): ReLU()
          (2): Linear(in_features=1536, out_features=384, bias=True)
          (3): Dropout(p=0.2, inplace=False)
        )
      )
      (ln1): LayerNorm((384,), eps=1e-05, elementwise_affine=

#### Training and Saving the Model

In [19]:
from tqdm import tqdm

# Create an instance of EstimateLoss
estimator = EstimateLoss(model, eval_iters, get_batch)

# Initialize the Optimizer Adam with Weight Decay
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

for iter in tqdm(range(max_iters)):
    if iter % eval_iters == 0:
        # Print the input sequence before encoding
        xb, yb = get_batch('train')
        print(f"Input sequence: {decode(xb[0].tolist())}")

        losses = estimator.estimate_loss()
        print(f"step: {iter}, train loss: {losses['train']:.3f}, val loss: {losses['val']:.3f}")

    # sample a batch of data
    xb, yb = get_batch('train')

    # evaluate the loss
    logits, loss = model.forward(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
    # print(loss.item())

with open('model-01.pkl', 'wb') as f:
    pickle.dump(model, f)

print('model saved')

  0%|          | 0/10000 [00:00<?, ?it/s]

Input sequence: ck signals made with
their wooden fingers or lips. Neither was there any sound to be heard
anywhere throughout the wooden countr


  0%|          | 3/10000 [00:04<3:26:06,  1.24s/it] 

step: 0, train loss: 2.320, val loss: 2.378


  1%|          | 99/10000 [00:10<10:29, 15.73it/s]

Input sequence: ne Rain of Stones. This second one was a Rain
of People-and-Horse-and-Buggy. And some stones came with them."

"Will there be an


  1%|          | 103/10000 [00:15<1:28:40,  1.86it/s]

step: 100, train loss: 2.191, val loss: 2.244


  2%|▏         | 199/10000 [00:21<10:33, 15.48it/s]

Input sequence: The
fruit was so daintily colored and so fragrant, and looked so appetizing
and delicious that Dorothy stopped and exclaimed:

"


  2%|▏         | 203/10000 [00:26<1:29:17,  1.83it/s]

step: 200, train loss: 2.102, val loss: 2.158


  3%|▎         | 299/10000 [00:32<10:31, 15.35it/s]

Input sequence:  the little
man did not watch him long. Instead, he drew a leathern case from his
pocket and took from it several sharp knives, 


  3%|▎         | 303/10000 [00:37<1:29:32,  1.81it/s]

step: 300, train loss: 2.020, val loss: 2.103


  4%|▍         | 399/10000 [00:44<10:27, 15.30it/s]

Input sequence:                                                      13

  2  THE GLASS CITY                                                    


  4%|▍         | 403/10000 [00:49<1:28:42,  1.80it/s]

step: 400, train loss: 1.967, val loss: 2.041


  5%|▍         | 499/10000 [00:55<10:37, 14.90it/s]

Input sequence:  every direction as he tumbled through space. Also, turning
her head, she found that she could see the boy beside her, who had u


  5%|▌         | 503/10000 [01:00<1:28:08,  1.80it/s]

step: 500, train loss: 1.897, val loss: 1.994


  6%|▌         | 599/10000 [01:06<10:32, 14.85it/s]

Input sequence: ng passed through it."

"Then we're all right," said the girl, "for if the dragon went the other
way she can't poss'bly get to u


  6%|▌         | 603/10000 [01:11<1:28:13,  1.78it/s]

step: 600, train loss: 1.860, val loss: 1.965


  7%|▋         | 699/10000 [01:18<10:24, 14.90it/s]

Input sequence: sleeve and nothing concealed about my person. Also, my hat is quite
empty." He took off his hat and held it upside down, shaking


  7%|▋         | 703/10000 [01:23<1:28:15,  1.76it/s]

step: 700, train loss: 1.801, val loss: 1.919


  8%|▊         | 799/10000 [01:29<10:23, 14.75it/s]

Input sequence: ," said the other. "I am a great inventor, you must know, and
I manufacture my products in this lonely spot."

"What are your pr


  8%|▊         | 803/10000 [01:34<1:27:47,  1.75it/s]

step: 800, train loss: 1.770, val loss: 1.898


  9%|▉         | 899/10000 [01:41<10:14, 14.82it/s]

Input sequence: e replied; "and in my satchel are other useful things to fight
with."

"What the Gargoyles most dread is a noise," said the man'


  9%|▉         | 903/10000 [01:46<1:26:44,  1.75it/s]

step: 900, train loss: 1.739, val loss: 1.847


 10%|▉         | 999/10000 [01:53<10:16, 14.59it/s]

Input sequence: wo near his head
and two near his tail. They were a bit wiggley, but secure enough if
only the harness held together.

The other


 10%|█         | 1003/10000 [01:58<1:25:49,  1.75it/s]

step: 1000, train loss: 1.696, val loss: 1.834


 11%|█         | 1099/10000 [02:04<10:08, 14.62it/s]

Input sequence: al
there to enable him to prepare several new tricks which he had learned
from some of the jugglers in the circus, and he had pa


 11%|█         | 1103/10000 [02:09<1:25:21,  1.74it/s]

step: 1100, train loss: 1.661, val loss: 1.813


 12%|█▏        | 1199/10000 [02:16<10:11, 14.40it/s]

Input sequence: es. They began to wonder if there
were no people to inhabit this magnificent city of the inner world.

Suddenly a man appeared t


 12%|█▏        | 1203/10000 [02:21<1:25:54,  1.71it/s]

step: 1200, train loss: 1.638, val loss: 1.777


 13%|█▎        | 1299/10000 [02:28<10:08, 14.30it/s]

Input sequence:  so confident.

"Those wooden things are impossible to hurt," he said, "and all the
damage Jim has done to them is to knock a fe


 13%|█▎        | 1303/10000 [02:33<1:24:47,  1.71it/s]

step: 1300, train loss: 1.619, val loss: 1.763


 14%|█▍        | 1399/10000 [02:40<09:54, 14.48it/s]

Input sequence: ld ornaments decorating his breast; but his bald head and
wrinkled features made him appear more amusing than impressive.

Ozma 


 14%|█▍        | 1403/10000 [02:45<1:23:40,  1.71it/s]

step: 1400, train loss: 1.582, val loss: 1.758


 15%|█▍        | 1499/10000 [02:52<09:49, 14.41it/s]

Input sequence:  thing is true,
because since the time of that interview there is no piglet to be found
anywhere."

[Illustration: EUREKA IN COU


 15%|█▌        | 1503/10000 [02:57<1:22:58,  1.71it/s]

step: 1500, train loss: 1.575, val loss: 1.744


 16%|█▌        | 1599/10000 [03:03<09:49, 14.24it/s]

Input sequence: , Billina and Eureka, make up and be
friends, I'll take my Magic Belt and wish you both home again,
_immejitly_. So, there!"

Th


 16%|█▌        | 1603/10000 [03:09<1:21:46,  1.71it/s]

step: 1600, train loss: 1.546, val loss: 1.708


 17%|█▋        | 1699/10000 [03:15<09:42, 14.25it/s]

Input sequence: n was a grand success, and when it had
returned to the palace the citizens crowded into the great Throne Room
to see the Wizard 


 17%|█▋        | 1703/10000 [03:20<1:21:17,  1.70it/s]

step: 1700, train loss: 1.535, val loss: 1.707


 18%|█▊        | 1799/10000 [03:27<09:32, 14.32it/s]

Input sequence: at them than it would be for Jim to eat you."

"And that's just what I shall do if you don't let those little balls of
pork alon


 18%|█▊        | 1803/10000 [03:32<1:20:47,  1.69it/s]

step: 1800, train loss: 1.518, val loss: 1.661


 19%|█▉        | 1899/10000 [03:39<09:26, 14.31it/s]

Input sequence: to the platform again. Had there
been any doors or windows in the lower rooms, or had not the boards of
the house been so thick 


 19%|█▉        | 1903/10000 [03:44<1:19:57,  1.69it/s]

step: 1900, train loss: 1.495, val loss: 1.677


 20%|█▉        | 1999/10000 [03:51<09:24, 14.17it/s]

Input sequence: by those saucy dragonettes. No one knows what the mother
might do."

They now moved on again, creeping slowly up another steep i


 20%|██        | 2003/10000 [03:56<1:18:39,  1.69it/s]

step: 2000, train loss: 1.481, val loss: 1.653


 21%|██        | 2099/10000 [04:03<09:21, 14.08it/s]

Input sequence: e beauties of nature,
the dainty flowers and trees, the green fields and the clear blue of the
sky."

"How about the birds and b


 21%|██        | 2103/10000 [04:08<1:17:43,  1.69it/s]

step: 2100, train loss: 1.463, val loss: 1.653


 22%|██▏       | 2199/10000 [04:15<09:19, 13.93it/s]

Input sequence:  they are made of wood, and as there is no night here
they select a certain time of the day in which to sleep or doze."

"I feel


 22%|██▏       | 2203/10000 [04:21<1:16:56,  1.69it/s]

step: 2200, train loss: 1.445, val loss: 1.633


 23%|██▎       | 2299/10000 [04:27<09:10, 13.99it/s]

Input sequence: it Dorothy's
white kitten crept out and ran up the stairs."

Hearing this, Dorothy and the Wizard exchanged startled glances, fo


 23%|██▎       | 2303/10000 [04:33<1:16:24,  1.68it/s]

step: 2300, train loss: 1.439, val loss: 1.645


 24%|██▍       | 2399/10000 [04:39<09:07, 13.89it/s]

Input sequence: ESSON.]

I am sorry to record the fact that Jim was not only ashamed of his
defeat but for a moment lost control of his temper. 


 24%|██▍       | 2403/10000 [04:45<1:18:00,  1.62it/s]

step: 2400, train loss: 1.429, val loss: 1.620


 25%|██▍       | 2499/10000 [04:52<08:53, 14.06it/s]

Input sequence: o come down again. So what
could I do but tell "what happened to the Wizard afterward"? You will
find him in these pages, just t


 25%|██▌       | 2503/10000 [04:57<1:14:08,  1.69it/s]

step: 2500, train loss: 1.420, val loss: 1.607


 26%|██▌       | 2599/10000 [05:04<08:47, 14.04it/s]

Input sequence: ," exclaimed the girl, much distressed.
"The Gurgles will get her, sure!"

"Ha, ha!" chuckled the old cab-horse; "they're not 'G


 26%|██▌       | 2603/10000 [05:09<1:12:59,  1.69it/s]

step: 2600, train loss: 1.402, val loss: 1.606


 27%|██▋       | 2699/10000 [05:16<08:38, 14.07it/s]

Input sequence: refully.

"To be sure," said the other. "I am a great inventor, you must know, and
I manufacture my products in this lonely spot


 27%|██▋       | 2703/10000 [05:21<1:12:16,  1.68it/s]

step: 2700, train loss: 1.385, val loss: 1.614


 28%|██▊       | 2799/10000 [05:28<08:27, 14.18it/s]

Input sequence: ka quickly followed him,
and soon they were all standing together upon the platform, with eight
of the much prized wooden wings 


 28%|██▊       | 2803/10000 [05:33<1:11:29,  1.68it/s]

step: 2800, train loss: 1.378, val loss: 1.599


 29%|██▉       | 2899/10000 [05:40<08:22, 14.13it/s]

Input sequence: n a grave?" asked Dorothy.

"Don't interrupt, little girl," said the Woggle-Bug. "When I get my
thoughts arranged in good order 


 29%|██▉       | 2903/10000 [05:46<1:11:03,  1.66it/s]

step: 2900, train loss: 1.367, val loss: 1.583


 30%|██▉       | 2999/10000 [05:52<08:17, 14.07it/s]

Input sequence: othy. "Are these bears here?"

"That is the one evil of our country," answered the invisible man. "Many
large and fierce bears r


 30%|███       | 3003/10000 [05:58<1:09:47,  1.67it/s]

step: 3000, train loss: 1.367, val loss: 1.620


 31%|███       | 3099/10000 [06:05<08:16, 13.91it/s]

Input sequence: ept your kind offer with gratitude, gracious Princess," the little
man said, in a soft voice, and they could all see that tear-d


 31%|███       | 3103/10000 [06:10<1:08:47,  1.67it/s]

step: 3100, train loss: 1.347, val loss: 1.587


 32%|███▏      | 3199/10000 [06:17<08:02, 14.09it/s]

Input sequence: t, and I see her pounce upon the innocent
creature and eat it up----"

"Are you still seeing with your mind's eye?" enquired the


 32%|███▏      | 3203/10000 [06:22<1:07:35,  1.68it/s]

step: 3200, train loss: 1.330, val loss: 1.573


 33%|███▎      | 3299/10000 [06:29<07:52, 14.18it/s]

Input sequence: any of the Gargoyles act badly, and have to be put in jail, they are
brought here and their wings unhooked and taken away from t


 33%|███▎      | 3303/10000 [06:34<1:06:25,  1.68it/s]

step: 3300, train loss: 1.327, val loss: 1.572


 34%|███▍      | 3399/10000 [06:41<07:51, 14.01it/s]

Input sequence: gether again and hitched Jim to the buggy. Then, with the
Wizard's help, he tried to fasten some of the wings to the old
cab-hor


 34%|███▍      | 3403/10000 [06:46<1:05:41,  1.67it/s]

step: 3400, train loss: 1.321, val loss: 1.565


 35%|███▍      | 3499/10000 [06:53<07:52, 13.75it/s]

Input sequence:  Swiss Cheese, and I will acknowledge that I
supplied a superior article, which was in great demand. Also I made
pores for porou


 35%|███▌      | 3503/10000 [06:58<1:04:54,  1.67it/s]

step: 3500, train loss: 1.323, val loss: 1.560


 36%|███▌      | 3599/10000 [07:05<07:46, 13.73it/s]

Input sequence: f the cave, brought the piglets
out one by one, and allowed them to run around as much as they pleased.

"My dears," he said to 


 36%|███▌      | 3603/10000 [07:11<1:05:11,  1.64it/s]

step: 3600, train loss: 1.302, val loss: 1.545


 37%|███▋      | 3699/10000 [07:18<07:28, 14.04it/s]

Input sequence: Gradually the balloon grew bigger, which was proof that it was settling
down upon the Land of the Mangaboos. Dorothy was surpris


 37%|███▋      | 3703/10000 [07:23<1:02:42,  1.67it/s]

step: 3700, train loss: 1.299, val loss: 1.551


 38%|███▊      | 3799/10000 [07:30<07:22, 14.02it/s]

Input sequence: ow?" asked the Wizard.

"I will show you," was the reply. "Step this way, please."

He led them within another but smaller circl


 38%|███▊      | 3803/10000 [07:35<1:01:38,  1.68it/s]

step: 3800, train loss: 1.309, val loss: 1.538


 39%|███▉      | 3899/10000 [07:42<07:10, 14.17it/s]

Input sequence: t on the
dressing-table. I want to play with it."

Jellia at once departed on the errand, and she was gone so long that
they had


 39%|███▉      | 3903/10000 [07:47<1:00:56,  1.67it/s]

step: 3900, train loss: 1.279, val loss: 1.531


 40%|███▉      | 3999/10000 [07:54<07:05, 14.10it/s]

Input sequence: f nothing."

He placed the hat upon the glass floor, made a pass with his hand, and
then removed the hat, displaying a little wh


 40%|████      | 4003/10000 [07:59<1:00:05,  1.66it/s]

step: 4000, train loss: 1.268, val loss: 1.532


 41%|████      | 4099/10000 [08:06<07:02, 13.95it/s]

Input sequence:  in his cold, calm voice:

"You are indeed a wonderful Wizard, and your powers are greater than
those of my Sorcerer."

"He will


 41%|████      | 4103/10000 [08:12<59:11,  1.66it/s]  

step: 4100, train loss: 1.265, val loss: 1.518


 42%|████▏     | 4199/10000 [08:19<06:54, 14.00it/s]

Input sequence: d together again with cords and bits of wire. The
buggy seemed almost new, for it had a shiny top and side curtains.
Getting aro


 42%|████▏     | 4203/10000 [08:24<58:04,  1.66it/s]  

step: 4200, train loss: 1.260, val loss: 1.532


 43%|████▎     | 4299/10000 [08:31<06:49, 13.91it/s]

Input sequence: here being no night to divide the hours into days--our friends
were not disturbed in any way. They were even permitted to occupy


 43%|████▎     | 4303/10000 [08:36<56:46,  1.67it/s]  

step: 4300, train loss: 1.250, val loss: 1.544


 44%|████▍     | 4399/10000 [08:43<06:42, 13.92it/s]

Input sequence: place at all," answered the man with the braids; "that is, not
recently. Once I lived on top the earth, but for many years I hav


 44%|████▍     | 4403/10000 [08:48<55:56,  1.67it/s]  

step: 4400, train loss: 1.243, val loss: 1.527


 45%|████▍     | 4499/10000 [08:55<06:36, 13.86it/s]

Input sequence: ss looked at the strange
piglets as if she were as truly astonished as any vegetable person could
be. But afterward she said:

"


 45%|████▌     | 4503/10000 [09:00<54:39,  1.68it/s]  

step: 4500, train loss: 1.256, val loss: 1.528


 46%|████▌     | 4599/10000 [09:07<06:26, 13.96it/s]

Input sequence: alace.

[Illustration]



CHAPTER 15.

OLD FRIENDS ARE REUNITED


Many servants dressed in handsome uniforms stood ready to welc


 46%|████▌     | 4603/10000 [09:13<53:51,  1.67it/s]  

step: 4600, train loss: 1.241, val loss: 1.512


 47%|████▋     | 4699/10000 [09:20<06:19, 13.97it/s]

Input sequence: grass watching Jim, who was still busily eating,
Eureka said:

"I don't believe you are a Wizard at all!"

"No," answered the li


 47%|████▋     | 4703/10000 [09:25<53:06,  1.66it/s]  

step: 4700, train loss: 1.231, val loss: 1.530


 48%|████▊     | 4799/10000 [09:32<06:12, 13.95it/s]

Input sequence: the piglets will be perfectly safe, hereafter, as far as I am
concerned."

"That is right, Eureka," remarked the Wizard, earnest


 48%|████▊     | 4803/10000 [09:37<52:08,  1.66it/s]  

step: 4800, train loss: 1.225, val loss: 1.525


 49%|████▉     | 4899/10000 [09:44<06:04, 14.00it/s]

Input sequence: as any person
in sight; but after a while the child discovered a horse and buggy
standing near a group of trees a short distance


 49%|████▉     | 4903/10000 [09:49<50:51,  1.67it/s]  

step: 4900, train loss: 1.224, val loss: 1.490


 50%|████▉     | 4999/10000 [09:56<06:01, 13.82it/s]

Input sequence: e.

"A nice country this is," he grumbled, "where a respectable horse has to
eat pink grass!"

"It's violet," said the Wizard, w


 50%|█████     | 5003/10000 [10:01<49:48,  1.67it/s]  

step: 5000, train loss: 1.208, val loss: 1.519


 51%|█████     | 5099/10000 [10:08<05:51, 13.93it/s]

Input sequence:  EARTHQUAKE                                                      13

  2  THE GLASS CITY                                        


 51%|█████     | 5103/10000 [10:14<48:47,  1.67it/s]  

step: 5100, train loss: 1.207, val loss: 1.519


 52%|█████▏    | 5199/10000 [10:20<05:49, 13.75it/s]

Input sequence: ught up another piglet and pushed it
into the first, where it disappeared. And so, one by one, the nine tiny
piglets were pushed


 52%|█████▏    | 5203/10000 [10:26<47:51,  1.67it/s]  

step: 5200, train loss: 1.190, val loss: 1.502


 53%|█████▎    | 5299/10000 [10:33<05:42, 13.71it/s]

Input sequence:  we keep in our houses today is descended from the
wild cat of the jungle--a very ferocious creature, indeed. The Wizard
knew th


 53%|█████▎    | 5303/10000 [10:38<47:01,  1.66it/s]  

step: 5300, train loss: 1.197, val loss: 1.498


 54%|█████▍    | 5399/10000 [10:45<05:29, 13.98it/s]

Input sequence: hy, who noticed that the beautiful man did not
look where he was going; "be careful, or you'll fall off!"

But he paid no attent


 54%|█████▍    | 5403/10000 [10:50<46:03,  1.66it/s]  

step: 5400, train loss: 1.189, val loss: 1.497


 55%|█████▍    | 5499/10000 [10:57<05:22, 13.95it/s]

Input sequence: uch stronger and fiercer. The
beast was quite dead from the sword thrusts, and after a glance at its
terrible claws and sharp te


 55%|█████▌    | 5503/10000 [11:02<44:45,  1.67it/s]  

step: 5500, train loss: 1.179, val loss: 1.504


 56%|█████▌    | 5599/10000 [11:09<05:14, 14.01it/s]

Input sequence:  I weigh about half a ton."

"You don't weigh as much as you ought to, Jim," remarked the girl,
shaking her head as she looked a


 56%|█████▌    | 5603/10000 [11:14<43:51,  1.67it/s]  

step: 5600, train loss: 1.169, val loss: 1.498


 57%|█████▋    | 5699/10000 [11:21<05:06, 14.05it/s]

Input sequence:  the time he had
returned Dorothy was awake. Then the three held a counsel to decide what
they should do next, but could think o


 57%|█████▋    | 5703/10000 [11:27<42:38,  1.68it/s]

step: 5700, train loss: 1.170, val loss: 1.492


 58%|█████▊    | 5799/10000 [11:33<05:00, 14.00it/s]

Input sequence: ward the glass city to escort their new ruler to her palace
and to perform those ceremonies proper to the occasion. But while th


 58%|█████▊    | 5803/10000 [11:39<41:46,  1.67it/s]

step: 5800, train loss: 1.155, val loss: 1.493


 59%|█████▉    | 5899/10000 [11:46<04:54, 13.91it/s]

Input sequence: y were tower-like in shape and the best of
them seemed old and weather-worn; yet all were strong and substantial.

To one of the


 59%|█████▉    | 5903/10000 [11:51<40:57,  1.67it/s]

step: 5900, train loss: 1.164, val loss: 1.473


 60%|█████▉    | 5999/10000 [11:58<04:44, 14.05it/s]

Input sequence: et into a lot of mischief. Mother usually knows what she
is about, but she made a mistake this time; for you are sure to escape



 60%|██████    | 6003/10000 [12:03<40:03,  1.66it/s]

step: 6000, train loss: 1.162, val loss: 1.478


 61%|██████    | 6099/10000 [12:10<04:36, 14.11it/s]

Input sequence: n next the
door was opened you ran out and hid yourself--and the piglet was gone."

"That's none of my business," growled the ki


 61%|██████    | 6103/10000 [12:15<38:54,  1.67it/s]

step: 6100, train loss: 1.136, val loss: 1.488


 62%|██████▏   | 6199/10000 [12:22<04:32, 13.96it/s]

Input sequence: ss in the buggy when he had
taken it off from Jim to let the horse lie down and rest. So there was
nothing for the girl to carry


 62%|██████▏   | 6203/10000 [12:28<37:44,  1.68it/s]

step: 6200, train loss: 1.147, val loss: 1.488


 63%|██████▎   | 6299/10000 [12:34<04:24, 14.00it/s]

Input sequence: ab-horse made several curious sounds
that led the little girl to suspect he was laughing at them all.

[Illustration]




CHAPTE


 63%|██████▎   | 6303/10000 [12:40<36:43,  1.68it/s]

step: 6300, train loss: 1.143, val loss: 1.492


 64%|██████▍   | 6399/10000 [12:47<04:15, 14.12it/s]

Input sequence:  it," he answered. "I only bossed the job, as we say
in Omaha."

"But you ruled it wisely and well for many years," said she, "a


 64%|██████▍   | 6403/10000 [12:52<35:52,  1.67it/s]

step: 6400, train loss: 1.122, val loss: 1.500


 65%|██████▍   | 6499/10000 [12:59<04:08, 14.08it/s]

Input sequence: efully."

He began making queer signs and passes toward the Wizard; but the little
man did not watch him long. Instead, he drew 


 65%|██████▌   | 6503/10000 [13:04<35:00,  1.67it/s]

step: 6500, train loss: 1.109, val loss: 1.491


 66%|██████▌   | 6599/10000 [13:11<04:02, 14.04it/s]

Input sequence: right," returned the man's voice, more pleasantly than
before. "You are welcome to what we have."

As he spoke the voice came so


 66%|██████▌   | 6603/10000 [13:16<34:03,  1.66it/s]

step: 6600, train loss: 1.115, val loss: 1.467


 67%|██████▋   | 6699/10000 [13:23<03:56, 13.98it/s]

Input sequence: e the four countries cornered together, and when it was
completed I announced myself the Ruler of the Land of Oz, which included


 67%|██████▋   | 6703/10000 [13:28<33:02,  1.66it/s]

step: 6700, train loss: 1.111, val loss: 1.509


 68%|██████▊   | 6799/10000 [13:35<03:47, 14.06it/s]

Input sequence:                               L. FRANK BAUM
CORONADO, 1908.




LIST OF CHAPTERS


CHAPTER                                      


 68%|██████▊   | 6803/10000 [13:41<31:50,  1.67it/s]

step: 6800, train loss: 1.116, val loss: 1.479


 69%|██████▉   | 6899/10000 [13:47<03:41, 14.02it/s]

Input sequence: mountain, for my subjects cannot bear to have them
around."

The Wizard was so pleased to have saved the two children and himsel


 69%|██████▉   | 6903/10000 [13:53<30:46,  1.68it/s]

step: 6900, train loss: 1.120, val loss: 1.483


 70%|██████▉   | 6999/10000 [14:00<03:33, 14.06it/s]

Input sequence: sh," said Jim. "Fetch it on, but don't cook it,
as you value your life."

You see, the respect shown the worn-out old cab-horse 


 70%|███████   | 7003/10000 [14:05<29:39,  1.68it/s]

step: 7000, train loss: 1.111, val loss: 1.476


 71%|███████   | 7099/10000 [14:12<03:26, 14.03it/s]

Input sequence: e four nations that inhabit the Land of Oz," was the reply. "I wonder
if they would treat me nicely if I went there again."

"Of


 71%|███████   | 7103/10000 [14:17<28:55,  1.67it/s]

step: 7100, train loss: 1.106, val loss: 1.485


 72%|███████▏  | 7199/10000 [14:24<03:22, 13.84it/s]

Input sequence: holes, and having no room in which to store them I set them all end
to end and put the top one in the ground. That made an extra


 72%|███████▏  | 7203/10000 [14:29<28:00,  1.66it/s]

step: 7200, train loss: 1.100, val loss: 1.491


 73%|███████▎  | 7299/10000 [14:36<03:11, 14.07it/s]

Input sequence: at the animal. "You're dreadfully
skinny."

"Oh, well; I'm old," said the horse, hanging his head despondently, "and
I've had lo


 73%|███████▎  | 7303/10000 [14:41<27:26,  1.64it/s]

step: 7300, train loss: 1.082, val loss: 1.480


 74%|███████▍  | 7399/10000 [14:48<03:04, 14.08it/s]

Input sequence:  before yesterday."

"But that isn't young!" cried Dorothy, in amazement.

"No?" drawled the dragonette; "it seems to me very ba


 74%|███████▍  | 7403/10000 [14:54<25:48,  1.68it/s]

step: 7400, train loss: 1.091, val loss: 1.477


 75%|███████▍  | 7499/10000 [15:00<02:56, 14.15it/s]

Input sequence: had gone
both Jim and Eureka protested they did not want to go to the Black Pit,
and Dorothy promised she would do all that she 


 75%|███████▌  | 7503/10000 [15:06<24:49,  1.68it/s]

step: 7500, train loss: 1.090, val loss: 1.469


 76%|███████▌  | 7599/10000 [15:13<02:51, 14.01it/s]

Input sequence: ed to the ground, and seeing his success Jim kicked
again and again, charging into the vegetable crowd, knocking them in
all dir


 76%|███████▌  | 7603/10000 [15:18<23:54,  1.67it/s]

step: 7600, train loss: 1.075, val loss: 1.469


 77%|███████▋  | 7699/10000 [15:25<02:44, 14.02it/s]

Input sequence: ur parts of the kingdom themselves; so when the Ruler, my
grandfather, was hunting one day, one Wicked Witch named Mombi stole h


 77%|███████▋  | 7703/10000 [15:30<23:02,  1.66it/s]

step: 7700, train loss: 1.082, val loss: 1.497


 78%|███████▊  | 7799/10000 [15:37<02:36, 14.02it/s]

Input sequence: to follow them he felt a hot breath against
his cheek and heard a low, fierce growl. At once he began stabbing at
the air with h


 78%|███████▊  | 7803/10000 [15:42<22:02,  1.66it/s]

step: 7800, train loss: 1.054, val loss: 1.464


 79%|███████▉  | 7899/10000 [15:49<02:29, 14.03it/s]

Input sequence:  back."

So together they leaned over the great bush and each of them seized one
hand of the lovely Princess.

"Pull!" cried Dor


 79%|███████▉  | 7903/10000 [15:54<20:53,  1.67it/s]

step: 7900, train loss: 1.063, val loss: 1.468


 80%|███████▉  | 7999/10000 [16:01<02:22, 14.05it/s]

Input sequence: o say: 'I wonder what
So-and-so is doing,' and at once the picture shows where her friend is
and what the friend is doing. That'


 80%|████████  | 8003/10000 [16:06<19:49,  1.68it/s]

step: 8000, train loss: 1.058, val loss: 1.487


 81%|████████  | 8099/10000 [16:13<02:15, 14.00it/s]

Input sequence: n; but perhaps she wasn't brought up properly.
Dorothy found her, you see, and who her parents were nobody knows.

I believe, my


 81%|████████  | 8103/10000 [16:19<18:50,  1.68it/s]

step: 8100, train loss: 1.051, val loss: 1.473


 82%|████████▏ | 8199/10000 [16:25<02:09, 13.96it/s]

Input sequence: ould be pretty dead, wouldn't it?" asked Dorothy.

"Yes, my dear. But we have no need to worry about that just now. Let us
exami


 82%|████████▏ | 8203/10000 [16:31<17:55,  1.67it/s]

step: 8200, train loss: 1.070, val loss: 1.479


 83%|████████▎ | 8299/10000 [16:38<02:00, 14.17it/s]

Input sequence: m to make her a certain sign and
she will put on the Nome King's Magic Belt and wish me to be with her in
Oz."

"Do you mean tha


 83%|████████▎ | 8303/10000 [16:43<16:59,  1.67it/s]

step: 8300, train loss: 1.052, val loss: 1.450


 84%|████████▍ | 8399/10000 [16:50<01:53, 14.09it/s]

Input sequence: e," said Zeb, who by now had unhitched the horse.

"If we had known we were coming we might have brought along several
other use


 84%|████████▍ | 8403/10000 [16:55<15:59,  1.66it/s]

step: 8400, train loss: 1.045, val loss: 1.456


 85%|████████▍ | 8499/10000 [17:02<01:47, 13.97it/s]

Input sequence: ple. Today came another Rain of
Stones, and soon after it you appeared among us."

"By the way," said the man with the star, loo


 85%|████████▌ | 8503/10000 [17:07<15:06,  1.65it/s]

step: 8500, train loss: 1.039, val loss: 1.499


 86%|████████▌ | 8599/10000 [17:14<01:39, 14.07it/s]

Input sequence: to you."

"Ozma can do it, easily," replied Dorothy. "Tomorrow morning I'll go to
Kansas and you can go to Californy."

[Illustr


 86%|████████▌ | 8603/10000 [17:20<13:53,  1.68it/s]

step: 8600, train loss: 1.038, val loss: 1.479


 87%|████████▋ | 8699/10000 [17:26<01:33, 13.93it/s]

Input sequence: d years, to the time of the famous Green Dragon of
Atlantis, who lived in a time when humans had not yet been created. Can
you m


 87%|████████▋ | 8703/10000 [17:32<12:52,  1.68it/s]

step: 8700, train loss: 1.025, val loss: 1.474


 88%|████████▊ | 8799/10000 [17:39<01:25, 14.10it/s]

Input sequence: h some difficulty and danger Jim drew the buggy over the
loose rocks until he reached the green lawns below, where the paths and


 88%|████████▊ | 8803/10000 [17:44<11:55,  1.67it/s]

step: 8800, train loss: 1.019, val loss: 1.484


 89%|████████▉ | 8899/10000 [17:51<01:18, 14.00it/s]

Input sequence: cape being pricked, and Zeb and
the Wizard, after enduring a few stabs from the thorns, were glad to
follow her. At once the Man


 89%|████████▉ | 8903/10000 [17:56<10:59,  1.66it/s]

step: 8900, train loss: 1.014, val loss: 1.485


 90%|████████▉ | 8999/10000 [18:03<01:11, 13.96it/s]

Input sequence: tchel from the buggy and, opening it, took out two deadly
looking revolvers that made the children shrink back in alarm just to



 90%|█████████ | 9003/10000 [18:08<09:59,  1.66it/s]

step: 9000, train loss: 1.024, val loss: 1.500


 91%|█████████ | 9099/10000 [18:15<01:04, 13.95it/s]

Input sequence: s I would not need a conscience, for nothing
could then tempt me to devour babies and lambs."

Just then Dorothy, who had risen 


 91%|█████████ | 9103/10000 [18:20<08:56,  1.67it/s]

step: 9100, train loss: 1.011, val loss: 1.466


 92%|█████████▏| 9199/10000 [18:27<00:57, 14.05it/s]

Input sequence: ing a murder.
Eureka is the sweet pet of a lovely little girl whom we all admire, and
gentleness and innocence are her chief vir


 92%|█████████▏| 9203/10000 [18:33<07:55,  1.67it/s]

step: 9200, train loss: 1.010, val loss: 1.473


 93%|█████████▎| 9299/10000 [18:39<00:49, 14.15it/s]

Input sequence: THE DEN OF THE DRAGONETTES


Our friends had a good start and were able to maintain it, for with
their eight wings they could go


 93%|█████████▎| 9303/10000 [18:45<06:55,  1.68it/s]

step: 9300, train loss: 1.008, val loss: 1.492


 94%|█████████▍| 9399/10000 [18:52<00:43, 13.92it/s]

Input sequence: aught up their
thorns and gave chase, mounting through the air after her. Eureka,
however, was lighter than the Mangaboos, and w


 94%|█████████▍| 9403/10000 [18:57<05:57,  1.67it/s]

step: 9400, train loss: 1.002, val loss: 1.474


 95%|█████████▍| 9499/10000 [19:04<00:35, 13.98it/s]

Input sequence: attraction of gravitation is very slight.
But I've noticed that many queer things happen in fairy countries."

"Is this a fairy 


 95%|█████████▌| 9503/10000 [19:09<04:58,  1.67it/s]

step: 9500, train loss: 1.009, val loss: 1.489


 96%|█████████▌| 9599/10000 [19:16<00:28, 14.03it/s]

Input sequence: ked the boy.

"Eat! No, indeed. We are quite solid inside our bodies, and have no need
to eat, any more than does a potato."

"B


 96%|█████████▌| 9603/10000 [19:21<03:58,  1.66it/s]

step: 9600, train loss: 1.001, val loss: 1.481


 97%|█████████▋| 9699/10000 [19:28<00:21, 14.04it/s]

Input sequence:  themselves. There is no reason, that I can see, why they
may not exist in the waters of this strange country."

Then the Wizard


 97%|█████████▋| 9703/10000 [19:33<02:58,  1.67it/s]

step: 9700, train loss: 0.978, val loss: 1.479


 98%|█████████▊| 9799/10000 [19:40<00:14, 14.01it/s]

Input sequence:  race."

There was more applause at this, and then Ozma had the jewelled saddle
replaced upon the Sawhorse and herself rode the 


 98%|█████████▊| 9803/10000 [19:46<01:57,  1.68it/s]

step: 9800, train loss: 0.996, val loss: 1.492


 99%|█████████▉| 9899/10000 [19:52<00:07, 13.97it/s]

Input sequence: uired the Scarecrow.

"As many times as is necessary," was the reply. "I will ask the Tin
Woodman to defend the prisoner, becaus


 99%|█████████▉| 9903/10000 [19:58<00:57,  1.68it/s]

step: 9900, train loss: 0.993, val loss: 1.488


100%|██████████| 10000/10000 [20:05<00:00,  8.30it/s]


model saved


#### Chat with the New GPT

#### The following snippet is from the Validation split

You can see that the generated out put is different from the one used in the training.

**Original Text :**  
"Thank you, dear Wiz," said the grateful Scarecrow. "Now we must get the oil-can and rescue the Tin Woodman."

In [49]:
prompt = '"Thank you, dear Wiz,"'
context = torch.tensor(encode(prompt), dtype=torch.long, device=device)
generated_chars = decode(m.generate(context.unsqueeze(0), max_new_tokens=100)[0].tolist())
print(generated_chars)

"Thank you, dear Wiz," remarked Dorothy, so he best held did to wander
with her people glossly were quite must be from the


#### Something unseen before creates some hallucination.   

But it attempts to form a sentence.

In [47]:
prompt = 'Hello I am Brook '
context = torch.tensor(encode(prompt), dtype=torch.long, device=device)
generated_chars = decode(m.generate(context.unsqueeze(0), max_new_tokens=100)[0].tolist())
print(generated_chars)

Hello I am Brook as earth, as while
you says I have here to our way willy try to eat them, he answer in a
General chi


In [57]:
prompt = 'Brook '
context = torch.tensor(encode(prompt), dtype=torch.long, device=device)
generated_chars = decode(m.generate(context.unsqueeze(0), max_new_tokens=100)[0].tolist())
print(generated_chars)

Brook and at
With this treat, was flumberly caught she sawho Jim
halves. Some Eureka, or course friends of
