In [3]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from datasets import Dataset
import torch
import os

# ✅ Disable Weights & Biases logging
os.environ["WANDB_DISABLED"] = "true"

# 🔹 Step 1: Install required packages
!pip install transformers datasets accelerate

# 🔹 Step 2: Import libraries
from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from datasets import Dataset
import torch

# 🔹 Step 3: Load GPT-2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
model.resize_token_embeddings(len(tokenizer))

# 🔹 Step 4: Embed dataset directly in code
lines = [
    "Once upon a time, in a quiet village, there lived a young boy named Arin.",
    "Every morning, Arin would climb the hill to see the sunrise.",
    "He loved the colors of the sky and the silence of the early hours.",
    "One day, he found a mysterious key buried under an old oak tree.",
    "He asked his grandmother, who told him stories about a hidden door in the forest.",
    "That night, he packed his bag and left for the adventure of his life.",
    "In the forest, the trees whispered secrets and the moon guided his path.",
    "After walking for hours, he found a door carved into a stone wall.",
    "The key fit perfectly, and the door opened to a world full of floating islands and glowing creatures.",
    "Arin had never seen such beauty. He met a wise owl who offered to teach him ancient magic.",
    "Days turned into weeks as he learned to fly, speak to animals, and read the stars.",
    "But a shadow loomed in the sky — the darkness was returning.",
    "Arin had to choose: return home or protect this magical world from fading forever.",
    "He stood tall and said, 'I will stay and fight.'",
    "From that day, Arin became the Guardian of the Forest Realm.",
    "And in his village, on clear nights, people still say they can see him flying across the moon."
]
dataset = Dataset.from_dict({"text": lines})

# 🔹 Step 5: Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["text"])

# 🔹 Step 6: Data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# 🔹 Step 7: Training arguments
training_args = TrainingArguments(
    output_dir="./gpt2-finetuned",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=2,
    save_steps=200,
    save_total_limit=1,
    prediction_loss_only=True,
    logging_dir="./logs",
    logging_steps=50,
)

# 🔹 Step 8: Train the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
)
trainer.train()

# 🔹 Step 9 (Optional): Save model
model.save_pretrained("gpt2-finetuned")
tokenizer.save_pretrained("gpt2-finetuned")

# ✅ Step 10: Fixed Text Generation Block (with device and warnings handled)

import torch

# 🔹 Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 🔹 Input prompt
input_text = "Once upon a time"
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)  # move to same device

# 🔹 Generate text
output = model.generate(
    input_ids=input_ids,
    max_length=100,
    do_sample=True,
    top_k=50,
    top_p=0.95,
    temperature=0.9,
    pad_token_id=tokenizer.eos_token_id  # to suppress warning
)

# 🔹 Decode and print result
print("\nGenerated Text:\n")
print(tokenizer.decode(output[0], skip_special_tokens=True))


print("\nGenerated Text:\n")
print(tokenizer.decode(output[0], skip_special_tokens=True))




Map:   0%|          | 0/16 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Step,Training Loss



Generated Text:

Once upon a time, when they were sealed in the world, an ancient power kept the world safe. The world was sealed. A mysterious force that kept the night still stood. It is said that the secrets were hidden. As they were sealed the night. The dark was never dawning. The world fell into the night.

A new world was coming to pass. It is called the Time to Come.

A time in the distant future, a dream came true. The world

Generated Text:

Once upon a time, when they were sealed in the world, an ancient power kept the world safe. The world was sealed. A mysterious force that kept the night still stood. It is said that the secrets were hidden. As they were sealed the night. The dark was never dawning. The world fell into the night.

A new world was coming to pass. It is called the Time to Come.

A time in the distant future, a dream came true. The world
