In [3]:
import torch
import torch.nn as nn
from LLM_Build.GPT2_small import GPTModel, GPT_CONFIG_124M
from LLM_Build.Preprocess import create_dataloader

In [4]:
GPT_CONFIG_124M['context_length'] = 256

In [5]:
with open("../../../../Data/the-verdict.txt", "r", encoding='utf-8') as f:
    text_data = f.read()

In [6]:
text_data

'I HAD always thought Jack Gisburn rather a cheap genius--though a good fellow enough--so it was no great surprise to me to hear that, in the height of his glory, he had dropped his painting, married a rich widow, and established himself in a villa on the Riviera. (Though I rather thought it would have been Rome or Florence.)\n\n"The height of his glory"--that was what the women called it. I can hear Mrs. Gideon Thwing--his last Chicago sitter--deploring his unaccountable abdication. "Of course it\'s going to send the value of my picture \'way up; but I don\'t think of that, Mr. Rickham--the loss to Arrt is all I think of." The word, on Mrs. Thwing\'s lips, multiplied its _rs_ as though they were reflected in an endless vista of mirrors. And it was not only the Mrs. Thwings who mourned. Had not the exquisite Hermia Croft, at the last Grafton Gallery show, stopped me before Gisburn\'s "Moon-dancers" to say, with tears in her eyes: "We shall not look upon its like again"?\n\nWell!--even 

In [7]:
def train_validation_split(text, split_ratio):
    split_at_index = int(len(text) * split_ratio)
    return text[:split_at_index], text[split_at_index:]

In [8]:
train_data, validation_data = train_validation_split(text_data,split_ratio=0.9)
train_data

'I HAD always thought Jack Gisburn rather a cheap genius--though a good fellow enough--so it was no great surprise to me to hear that, in the height of his glory, he had dropped his painting, married a rich widow, and established himself in a villa on the Riviera. (Though I rather thought it would have been Rome or Florence.)\n\n"The height of his glory"--that was what the women called it. I can hear Mrs. Gideon Thwing--his last Chicago sitter--deploring his unaccountable abdication. "Of course it\'s going to send the value of my picture \'way up; but I don\'t think of that, Mr. Rickham--the loss to Arrt is all I think of." The word, on Mrs. Thwing\'s lips, multiplied its _rs_ as though they were reflected in an endless vista of mirrors. And it was not only the Mrs. Thwings who mourned. Had not the exquisite Hermia Croft, at the last Grafton Gallery show, stopped me before Gisburn\'s "Moon-dancers" to say, with tears in her eyes: "We shall not look upon its like again"?\n\nWell!--even 

In [9]:
train_dataloader = create_dataloader(
    text = train_data,
    batch_size = 2,
    max_length = GPT_CONFIG_124M['context_length'],
    stride = GPT_CONFIG_124M['context_length'],
    shuffle = True,
    drop_last = True,
    num_workers = 0
)

validation_dataloader = create_dataloader(
    text = validation_data,
    batch_size = 2,
    max_length = GPT_CONFIG_124M['context_length'],
    stride = GPT_CONFIG_124M['context_length'],
    shuffle = False,
    drop_last = False,
    num_workers = 0
)

In [10]:
for x,y in train_dataloader:
    print(x.shape,y.shape)

torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])
torch.Size([2, 256]) torch.Size([2, 256])


In [11]:
for x,y in validation_dataloader:
    print(x.shape,y.shape)

torch.Size([2, 256]) torch.Size([2, 256])


In [12]:
def calculate_loss_of_Batch(input_batch, target_batch,model, device='cpu'):
    input_batch = input_batch.to(device)
    target_batch = target_batch.to(device)
    
    logits = model(input_batch)
    loss = torch.nn.functional.cross_entropy(
        logits.flatten(0,1), target_batch.flatten()
    ) 
    return loss

In [13]:
def calculate_loss_of_Loader(dataloader,model, device='cpu',num_of_batch=None):
    if num_of_batch == 0:
        return float('nan')
    elif num_of_batch is None:
        num_of_batch = len(dataloader)
    else:
        num_of_batch = min(len(dataloader), num_of_batch)
        
    total_dataloader_loss = 0
    for index, (input_batch, target_batch) in enumerate(dataloader):
        if index < num_of_batch:
            loss = calculate_loss_of_Batch(input_batch,target_batch,model=model,device=device)
            total_dataloader_loss += loss.item()
        else:
            break
            
    return total_dataloader_loss/num_of_batch

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [18]:
model = GPTModel(GPT_CONFIG_124M)

In [19]:
model.to(device)

GPTModel(
  (token_embedding): Embedding(50257, 768)
  (positional_embedding): Embedding(256, 768)
  (dropout_embedding): Dropout(p=0.1, inplace=False)
  (transformer_blocks): Sequential(
    (0): TransformerBlock(
      (layer_norm1): LayerNormalization()
      (attention): MultiHeadAttention(
        (dropout): Dropout(p=0.1, inplace=False)
        (out_proj): Linear(in_features=768, out_features=768, bias=True)
        (W_query): Linear(in_features=768, out_features=768, bias=False)
        (W_key): Linear(in_features=768, out_features=768, bias=False)
        (W_value): Linear(in_features=768, out_features=768, bias=False)
      )
      (dropout_shortcut): Dropout(p=0.1, inplace=False)
      (layer_norm2): LayerNormalization()
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU()
          (2): Linear(in_features=3072, out_features=768, bias=True)
        )
      )
    )
    (1): Transformer

In [20]:
with torch.no_grad():
    training_loss = calculate_loss_of_Loader(train_dataloader,model,device=device)
    validation_loss = calculate_loss_of_Loader(validation_dataloader,model,device=device)
    
print(training_loss,validation_loss)

10.99815559387207 10.994824409484863
