In [None]:
# Run in a Colab cell
!pip install -q transformers datasets accelerate safetensors gradio

In [None]:
!pip uninstall -y transformers


Found existing installation: transformers 4.56.0
Uninstalling transformers-4.56.0:
  Successfully uninstalled transformers-4.56.0


In [None]:
!pip install transformers --upgrade


Collecting transformers
  Using cached transformers-4.56.0-py3-none-any.whl.metadata (40 kB)
Using cached transformers-4.56.0-py3-none-any.whl (11.6 MB)
Installing collected packages: transformers
Successfully installed transformers-4.56.0


In [None]:
import torch
print("torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device name:", torch.cuda.get_device_name(0))

torch version: 2.8.0+cu126
CUDA available: True
Device name: Tesla T4


In [None]:
from google.colab import files
uploaded = files.upload()

Saving shakespeare.txt to shakespeare (3).txt


In [None]:
from datasets import load_dataset
dataset = load_dataset("text", data_files={"train":"shakespeare.txt"})
print(dataset)
print("Sample text snippet:\n", dataset["train"][0]["text"][:800])

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 248426
    })
})
Sample text snippet:
 


In [None]:
from transformers import GPT2TokenizerFast

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
# GPT-2 has no pad token by default; set it to eos so batching works
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    return tokenizer(examples["text"])

tokenized = dataset.map(tokenize_function, batched=True, num_proc=1, remove_columns=["text"])

# Group into blocks of block_size tokens for LM
block_size = 128  # change to 256 or 512 if you have enough memory/GPU

def group_texts(examples):
    # Concatenate all input_ids together and split in blocks of block_size
    concatenated = []
    for ids in examples["input_ids"]:
        concatenated.extend(ids)
    total_length = (len(concatenated) // block_size) * block_size
    result = {
        "input_ids": [concatenated[i : i + block_size] for i in range(0, total_length, block_size)]
    }
    # For causal LM, labels = input_ids
    result["labels"] = result["input_ids"].copy()
    return result

lm_dataset = tokenized.map(group_texts, batched=True, batch_size=1000, remove_columns=tokenized.column_names["train"])
print(lm_dataset)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 13390
    })
})


In [None]:
from transformers import GPT2LMHeadModel, DataCollatorForLanguageModeling, TrainingArguments, Trainer
import torch

model = GPT2LMHeadModel.from_pretrained("gpt2")
# if we added pad_token above, resize token embeddings
model.resize_token_embeddings(len(tokenizer))

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Training args - tweak to your GPU/memory availability
training_args = TrainingArguments(
    output_dir="./gpt2-shakespeare",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=2,        # lower if you get OOM
    gradient_accumulation_steps=8,       # simulates larger batch
    logging_steps=100,
    save_strategy="epoch",
    learning_rate=5e-5,
    weight_decay=0.01,
    fp16=torch.cuda.is_available(),      # use mixed precision if GPU supports it
)

In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./gpt2-shakespeare-final",
    logging_dir="./logs",
    report_to="none",   # 👈 no wandb, no tensorboard
    push_to_hub=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=lm_dataset["train"],
    data_collator=data_collator
)

trainer.train()

# Save final model + tokenizer
trainer.save_model("./gpt2-shakespeare-final")
tokenizer.save_pretrained("./gpt2-shakespeare-final")

`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
500,3.3505
1000,3.1429
1500,3.0466
2000,2.9634
2500,2.9173
3000,2.9021
3500,2.8561
4000,2.8239
4500,2.8207
5000,2.8119


('./gpt2-shakespeare-final/tokenizer_config.json',
 './gpt2-shakespeare-final/special_tokens_map.json',
 './gpt2-shakespeare-final/vocab.json',
 './gpt2-shakespeare-final/merges.txt',
 './gpt2-shakespeare-final/added_tokens.json',
 './gpt2-shakespeare-final/tokenizer.json')

In [None]:
from transformers import pipeline

generator = pipeline(
    "text-generation",
    model="./gpt2-shakespeare-final",
    tokenizer="./gpt2-shakespeare-final",
    device=0 if torch.cuda.is_available() else -1
)

prompt = "O for a muse of fire, that would ascend the brightest heaven of invention,"
out = generator(prompt, max_length=180, do_sample=True, temperature=0.9, top_k=50, top_p=0.92, num_return_sequences=1)
print(out[0]["generated_text"])

Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Both `max_new_tokens` (=256) and `max_length`(=180) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


O for a muse of fire, that would ascend the brightest heaven of invention,                                                                  [Falls in.]  FIRST SOLDIER. The Tower is up.  MALCOLM. This noise, this heavy noise, this heavy noise,    This heavy noise, this heavy noise, this heavy noise,    Hath made the walls crack, and I must go to't.  FIRST SOLDIER. What a noise! What a noise!                                                           [Exit.]                                                      


In [None]:
import gradio as gr

def gen_shakespeare(prompt, max_len=150, temp=0.8, top_p=0.95):
    out = generator(prompt, max_length=max_len, do_sample=True, temperature=temp, top_p=top_p, top_k=50, num_return_sequences=1)
    return out[0]["generated_text"]

iface = gr.Interface(fn=gen_shakespeare, inputs=[
    gr.Textbox(lines=3, label="Prompt"),
    gr.Slider(50, 500, value=150, label="Max Length"),
    gr.Slider(0.1, 1.2, value=0.8, label="Temperature"),
    gr.Slider(0.5, 1.0, value=0.95, label="Top-p")
], outputs="text", title="Shakespearean Generator")
iface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://8de9fdde8653d5483a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


