In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "decapoda-research/llama-7b-hf"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set the padding token
tokenizer.pad_token = tokenizer.eos_token

# Load the dataset
dataset = load_dataset("tiny_shakespeare")
'''
DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 1
    })
    validation: Dataset({
        features: ['text'],
        num_rows: 1
    })
    test: Dataset({
        features: ['text'],
        num_rows: 1
    })
})
'''

# Split the continuous text into smaller chunks
def split_text(text, max_length=100):
    return [text[i:i+max_length] for i in range(0, len(text), max_length)]

# Apply the split_text function to the dataset


split_texts = split_text(dataset["train"]["text"][0])

# Tokenize the split_texts
tokenized_texts = tokenizer(split_texts, return_tensors="pt", padding=True, truncation=True)

class ShiftedDataset(Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        input_ids = self.encodings["input_ids"][idx]
        attention_mask = self.encodings["attention_mask"][idx]
        labels = input_ids[1:].tolist() + [tokenizer.eos_token_id]
        return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": torch.tensor(labels)}

    def __len__(self):
        return len(self.encodings["input_ids"])

# Create a DataLoader
train_dataset = ShiftedDataset(tokenized_texts)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=4)


Found cached dataset tiny_shakespeare (/Users/premtimsina/.cache/huggingface/datasets/tiny_shakespeare/default/1.0.0/b5b13969f09fe8707337f6cb296314fbe06960bd9a868dca39e713e163d27b5e)


  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
item=next(iter(train_dataloader))
print(item['input_ids'])
print(item['attention_mask'])
print(item['labels'])

tensor([[  268,   262,   198, 22680,   318,  6157,    11,   290,  3520,    11,
           355,   339,  1139,    11,   534,   198,    79,  3832, 10597,   340,
           307,  3181,   345,    13,   198,   198, 39371,  3535, 44816,  2937,
            25,   198, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256],
        [  198,  3792,   477,   262,  2450,    11,  4202,   290,  9366,    11,
           198,  2504, 10598,   460,   787,  1028,   606,    13,   198,   198,
         49275,  1677,    40,  2937,    25,   198,  4342,  1282,   262,   220,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256],
        [  351,  1393,   198,  5189,  3478,  1661,  4274,  4461,   286, 12157,
            13,   198,  5247,    11,   788,   616,  2802,    11,   28

In [None]:
from accelerate import Accelerator
from transformers import GPT2LMHeadModel

# Initialize the Accelerator
accelerator = Accelerator()

# Configure the training arguments
num_epochs = 20
learning_rate = 5e-5

# Initialize the GPT-2 model and optimizer
model = GPT2LMHeadModel.from_pretrained("gpt2")
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Prepare the model and optimizer for training with Accelerator
model, optimizer, train_dataloader = accelerator.prepare(model, optimizer, train_dataloader)


In [None]:
num_epochs=40
epoch=20

In [None]:
from transformers import AdamW
from tqdm import tqdm

# Fine-tuning loop
for epoch in range(num_epochs):
    epoch_iterator = tqdm(train_dataloader, desc=f"Epoch {epoch + 1}")
    for step, batch in enumerate(epoch_iterator):
        optimizer.zero_grad()
        input_ids = batch["input_ids"]
        attention_mask = batch["attention_mask"]
        labels = batch["labels"]

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        accelerator.backward(loss)
        optimizer.step()

        if step % 500 == 0:
            epoch_iterator.set_postfix({"Loss": loss.item()}, refresh=True)

    # Save the model every 5 epochs
    if (epoch + 1) % 5 == 0:
        model_save_path = f"/Users/premtimsina/Documents/bpbbook/chapter6/model/tiny_shakespeare/model_checkpoint_epoch_{epoch + 1}"
        model.save_pretrained(model_save_path)
        print(f"Model saved at epoch {epoch + 1}")



Epoch 1: 100%|██████████████████| 2510/2510 [06:11<00:00,  6.75it/s, Loss=0.201]
Epoch 2: 100%|███████████████████| 2510/2510 [06:08<00:00,  6.80it/s, Loss=0.18]
Epoch 3: 100%|██████████████████| 2510/2510 [06:10<00:00,  6.78it/s, Loss=0.103]
Epoch 4: 100%|██████████████████| 2510/2510 [06:13<00:00,  6.73it/s, Loss=0.148]
Epoch 5: 100%|██████████████████| 2510/2510 [06:31<00:00,  6.41it/s, Loss=0.167]


Model saved at epoch 5


Epoch 6: 100%|██████████████████| 2510/2510 [06:36<00:00,  6.33it/s, Loss=0.123]
Epoch 7: 100%|██████████████████| 2510/2510 [06:34<00:00,  6.37it/s, Loss=0.122]
Epoch 8: 100%|█████████████████| 2510/2510 [06:26<00:00,  6.49it/s, Loss=0.0685]
Epoch 9: 100%|█████████████████| 2510/2510 [06:37<00:00,  6.32it/s, Loss=0.0491]
Epoch 10: 100%|█████████████████| 2510/2510 [06:31<00:00,  6.41it/s, Loss=0.144]


Model saved at epoch 10


Epoch 11: 100%|█████████████████| 2510/2510 [06:35<00:00,  6.34it/s, Loss=0.108]
Epoch 12: 100%|████████████████| 2510/2510 [06:30<00:00,  6.43it/s, Loss=0.0763]
Epoch 13: 100%|█████████████████| 2510/2510 [06:31<00:00,  6.40it/s, Loss=0.145]
Epoch 14: 100%|█████████████████| 2510/2510 [06:33<00:00,  6.38it/s, Loss=0.137]
Epoch 15: 100%|████████████████| 2510/2510 [06:31<00:00,  6.42it/s, Loss=0.0757]


Model saved at epoch 15


Epoch 16: 100%|██████████████████| 2510/2510 [06:30<00:00,  6.42it/s, Loss=0.12]
Epoch 17: 100%|█████████████████| 2510/2510 [06:30<00:00,  6.43it/s, Loss=0.113]
Epoch 18: 100%|█████████████████| 2510/2510 [06:29<00:00,  6.44it/s, Loss=0.141]
Epoch 19: 100%|████████████████| 2510/2510 [06:30<00:00,  6.42it/s, Loss=0.0813]
Epoch 20: 100%|████████████████| 2510/2510 [06:38<00:00,  6.29it/s, Loss=0.0753]


Model saved at epoch 20


Epoch 21: 100%|████████████████| 2510/2510 [06:30<00:00,  6.42it/s, Loss=0.0855]
Epoch 22: 100%|██████████████████| 2510/2510 [06:33<00:00,  6.39it/s, Loss=0.12]
Epoch 23: 100%|█████████████████| 2510/2510 [06:29<00:00,  6.44it/s, Loss=0.121]
Epoch 24: 100%|█████████████████| 2510/2510 [06:36<00:00,  6.32it/s, Loss=0.107]
Epoch 25: 100%|█████████████████| 2510/2510 [06:31<00:00,  6.41it/s, Loss=0.105]


Model saved at epoch 25


Epoch 26: 100%|█████████████████| 2510/2510 [06:32<00:00,  6.40it/s, Loss=0.124]
Epoch 27: 100%|████████████████| 2510/2510 [06:33<00:00,  6.37it/s, Loss=0.0576]
Epoch 28: 100%|████████████████| 2510/2510 [06:36<00:00,  6.34it/s, Loss=0.0918]
Epoch 29: 100%|█████████████████| 2510/2510 [06:33<00:00,  6.37it/s, Loss=0.116]
Epoch 30: 100%|████████████████| 2510/2510 [06:35<00:00,  6.35it/s, Loss=0.0753]


Model saved at epoch 30


Epoch 31: 100%|█████████████████| 2510/2510 [06:35<00:00,  6.34it/s, Loss=0.117]
Epoch 32: 100%|█████████████████| 2510/2510 [06:35<00:00,  6.35it/s, Loss=0.173]
Epoch 33: 100%|█████████████████| 2510/2510 [06:32<00:00,  6.39it/s, Loss=0.062]
Epoch 34: 100%|████████████████| 2510/2510 [06:32<00:00,  6.39it/s, Loss=0.0628]
Epoch 35: 100%|█████████████████| 2510/2510 [06:27<00:00,  6.48it/s, Loss=0.119]


Model saved at epoch 35


Epoch 36: 100%|████████████████| 2510/2510 [06:31<00:00,  6.41it/s, Loss=0.0706]
Epoch 37: 100%|█████████████████| 2510/2510 [06:32<00:00,  6.40it/s, Loss=0.119]
Epoch 38: 100%|████████████████| 2510/2510 [06:38<00:00,  6.29it/s, Loss=0.0789]
Epoch 39: 100%|██████████████████| 2510/2510 [06:31<00:00,  6.41it/s, Loss=0.11]
Epoch 40: 100%|████████████████| 2510/2510 [06:34<00:00,  6.36it/s, Loss=0.0707]


Model saved at epoch 40


In [None]:
num_epochs=40
epoch=20

20

In [None]:
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)

unwrapped_model.save_pretrained(model_path)
tokenizer.save_pretrained(tokenizer_path)


('/Users/premtimsina/Documents/bpbbook/chapter6/model/tiny_shakespeare/tiny_shakespeare_gpt2_tokenizer/tokenizer_config.json',
 '/Users/premtimsina/Documents/bpbbook/chapter6/model/tiny_shakespeare/tiny_shakespeare_gpt2_tokenizer/special_tokens_map.json',
 '/Users/premtimsina/Documents/bpbbook/chapter6/model/tiny_shakespeare/tiny_shakespeare_gpt2_tokenizer/vocab.json',
 '/Users/premtimsina/Documents/bpbbook/chapter6/model/tiny_shakespeare/tiny_shakespeare_gpt2_tokenizer/merges.txt',
 '/Users/premtimsina/Documents/bpbbook/chapter6/model/tiny_shakespeare/tiny_shakespeare_gpt2_tokenizer/added_tokens.json')

In [None]:
import torch

def generate_poem(prompt, model_path, tokenizer_path, max_words=50, max_seq_len=100, temperature=1.0):
    # Load the fine-tuned model and tokenizer
    model = GPT2LMHeadModel.from_pretrained(model_path)
    tokenizer = GPT2Tokenizer.from_pretrained(tokenizer_path)

    # Set the padding token and padding side
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = 'left'

    poem = ""
    remaining_words = max_words

    while remaining_words > 0:
        # Set the prompt and generate the text
        input_ids = tokenizer.encode(prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_seq_len)
        attention_mask = torch.ones_like(input_ids)

        max_tokens = min(remaining_words * 5, max_seq_len)  # Assuming each word has an average of 5 tokens
        output_ids = model.generate(
            input_ids,
            max_length=max_tokens,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            attention_mask=attention_mask,
            pad_token_id=tokenizer.pad_token_id,
            temperature=temperature,
        )

        # Convert the token IDs to text
        generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        poem += generated_text
        remaining_words -= len(generated_text.split())

        # Update the prompt with the last part of the generated text
        prompt = generated_text.split()[-max_seq_len:]

    return poem

import re

def post_process_poem(poem):
    # Remove any extra spaces
    poem = re.sub(r'\s+', ' ', poem).strip()

    # Capitalize the first letter of each sentence
    sentences = re.split(r'(?<=[\.\?!])\s', poem)
    formatted_sentences = [sentence.capitalize() for sentence in sentences]
    formatted_poem = ' '.join(formatted_sentences)

    # Add line breaks for readability
    line_breaks = re.compile(r'(?<=[,;:?!])\s')
    formatted_poem = line_breaks.sub('\n', formatted_poem)

    return formatted_poem





In [None]:
# Example usage
model_path = '/Users/premtimsina/Documents/bpbbook/chapter6/model/tiny_shakespeare/model_checkpoint_epoch_40'
tokenizer_path = 'gpt2'
prompt = "love"
max_words = 50
temperature = 0.9  # You can adjust this value for more or less randomness
generated_poem = generate_poem(prompt, model_path, tokenizer_path, max_words=max_words, temperature=temperature)
formatted_poem = post_process_poem(generated_poem)
print(formatted_poem)


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Love ill,,
have let go the;,
as remy top flowers by. Hast since have a parting himsh think longelovehaveletgoastopsincehaveathinka of more when was man again look than,ofmorewhenwasmanagainlooka upon err;
more at. Ly,,
you made work v,
virtue moreour.uponmoreyoumadeworkmeni:p you,
lord ,
am not mile with nor from worship colder i
