In [1]:
from LLM_Build.GPT2_small import GPT_CONFIG_124M, GPTModel, generate_text
from LLM_Build.Preprocess import create_dataloader
from LLM_Build.Evaluation import calculate_loss_of_Loader, calculate_loss_of_Batch, text_to_token_ids, token_ids_to_text, train_validation_split
import torch
import torch.nn as nn

In [2]:
def model_evaluation(train_loader, validation_loader, model, device, evaluation_itr):
    model.eval()
    
    with torch.no_grad():
        training_loss = calculate_loss_of_Loader(
            train_loader,model,device,num_of_batch=evaluation_itr
        )
        validation_loss = calculate_loss_of_Loader(
            validation_loader,model,device,num_of_batch=evaluation_itr
        )
    
    model.train()
    return training_loss, validation_loss

In [3]:
def generate_and_print_text(model, input_text, tokenizer, device, max_token_generate):
    model.eval()
    context_size = model.positional_embedding.weight.shape[0]
    encoded_text = text_to_token_ids(input_text, tokenizer)
    
    with torch.no_grad():
        token_ids = generate_text(
            model=model,
            inputs=encoded_text,
            max_new_tokens=max_token_generate,
            context_size=context_size
        )
    decoded_text = token_ids_to_text(token_ids,tokenizer)
    print(decoded_text.replace("\n"," "))
    model.train()

In [4]:
def train_model(train_dataloader, validation_dataloader, model, optimizer, device, epochs, evaluation_frequency, evaluation_itr, input_text, tokenizer,max_token_generate):
    
    training_losses, validation_losses, track_token_seen = [], [], []
    
    token_seen, global_step = 0, -1
    
    for epoch in range(epochs):
        model.train()
        for input_batch, target_batch in train_dataloader:
            optimizer.zero_grad()
            loss = calculate_loss_of_Batch(
                input_batch,target_batch,model,device
            )
            loss.backward()
            optimizer.step()
            
            global_step += 1
            token_seen += input_batch.numel()
            
            if global_step % evaluation_frequency == 0:
                training_loss, validation_loss = model_evaluation(
                    train_dataloader, validation_dataloader,model,device,evaluation_itr
                )
                
                training_losses.append(training_loss)
                validation_losses.append(validation_loss)
                track_token_seen.append(token_seen)
                print(f"{epoch+1}: (Step: {global_step:06d})")
                print(f"Training Loss: {training_loss:.3f}")
                print(f"Validation Loss: {validation_loss:.3f}")
                
        generate_and_print_text(
            model, input_text, tokenizer, device, max_token_generate
        )
                
    return training_losses, validation_losses, track_token_seen
            

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [6]:
model = GPTModel(GPT_CONFIG_124M)

In [7]:
model.positional_embedding.weight.shape

torch.Size([1024, 768])

In [8]:
model.to(device)

GPTModel(
  (token_embedding): Embedding(50257, 768)
  (positional_embedding): Embedding(1024, 768)
  (dropout_embedding): Dropout(p=0.1, inplace=False)
  (transformer_blocks): Sequential(
    (0): TransformerBlock(
      (layer_norm1): LayerNormalization()
      (attention): MultiHeadAttention(
        (dropout): Dropout(p=0.1, inplace=False)
        (out_proj): Linear(in_features=768, out_features=768, bias=True)
        (W_query): Linear(in_features=768, out_features=768, bias=False)
        (W_key): Linear(in_features=768, out_features=768, bias=False)
        (W_value): Linear(in_features=768, out_features=768, bias=False)
      )
      (dropout_shortcut): Dropout(p=0.1, inplace=False)
      (layer_norm2): LayerNormalization()
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU()
          (2): Linear(in_features=3072, out_features=768, bias=True)
        )
      )
    )
    (1): Transforme

In [9]:
with open("../../../../Data/the-verdict.txt", "r", encoding='utf-8') as f:
    text_data = f.read()
text_data

'I HAD always thought Jack Gisburn rather a cheap genius--though a good fellow enough--so it was no great surprise to me to hear that, in the height of his glory, he had dropped his painting, married a rich widow, and established himself in a villa on the Riviera. (Though I rather thought it would have been Rome or Florence.)\n\n"The height of his glory"--that was what the women called it. I can hear Mrs. Gideon Thwing--his last Chicago sitter--deploring his unaccountable abdication. "Of course it\'s going to send the value of my picture \'way up; but I don\'t think of that, Mr. Rickham--the loss to Arrt is all I think of." The word, on Mrs. Thwing\'s lips, multiplied its _rs_ as though they were reflected in an endless vista of mirrors. And it was not only the Mrs. Thwings who mourned. Had not the exquisite Hermia Croft, at the last Grafton Gallery show, stopped me before Gisburn\'s "Moon-dancers" to say, with tears in her eyes: "We shall not look upon its like again"?\n\nWell!--even 

In [10]:
train_data, validation_data = train_validation_split(text_data,0.9)

In [11]:
train_dataloader = create_dataloader(
    text = train_data,
    batch_size = 2,
    max_length = GPT_CONFIG_124M['context_length'],
    stride = GPT_CONFIG_124M['context_length'],
    shuffle = True,
    drop_last = True,
    num_workers = 0
)

validation_dataloader = create_dataloader(
    text = validation_data,
    batch_size = 2,
    max_length = GPT_CONFIG_124M['context_length'],
    stride = GPT_CONFIG_124M['context_length'],
    shuffle = False,
    drop_last = False,
    num_workers = 0
)

In [12]:
optimizer = torch.optim.AdamW(model.parameters(), lr=0.0004, weight_decay=0.1)

In [13]:
import tiktoken
tokenizer = tiktoken.get_encoding("gpt2")

In [14]:
epochs = 4

In [15]:
training_losses, validation_losses, track_token_seen = train_model(
    train_dataloader=train_dataloader,
    validation_dataloader=validation_dataloader,
    model=model,
    optimizer=optimizer,
    device=device,
    epochs=epochs,
    evaluation_frequency=5,
    evaluation_itr=5,
    input_text="Every effort moves you",
    tokenizer=tokenizer,
    max_token_generate=10
)

1: (Step: 000000)
Training Loss: 9.583
Validation Loss: 0.000
Every effort moves you,,,.,,,,,,
Every effort moves you,,,,,,,,,,
3: (Step: 000005)
Training Loss: 7.351
Validation Loss: 0.000
Every effort moves you, the, the, the, the, the
Every effort moves you, and, and, and, and, and


In [16]:
training_losses

[9.582722164052736, 7.351448674892117]

In [17]:
validation_losses

[0.0, 0.0]

In [18]:
track_token_seen

[2048, 12288]