In [1]:
!pip install transformers torch



In [2]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

In [3]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
print(f"\n Using device: {device}")


 Using device: cuda


In [9]:
text = "Hello,I am Reading"
inputs = tokenizer(text, return_tensors="pt").to(device)
model.eval()  # Set model to evaluation mode
with torch.no_grad():  # Don't calculate gradients (saves memory)
    outputs = model.generate(
        inputs["input_ids"],
        max_length=50,  # Maximum length of generated text
        num_return_sequences=1,  # How many versions to generate
        temperature=0.6,  # Creativity (higher = more creative)
        do_sample=True,  # Enable sampling for better quality
        top_k=50,  # Consider top 50 words
        top_p=0.95,  # Nucleus sampling
        repetition_penalty=1.2,  # Penalize repetition (1.0 = no penalty)
    )

# Convert numbers back to text
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(f"\n📝 Input: {text}")
print(f"🤖 Generated: {generated_text}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



📝 Input: Hello,I am Reading
🤖 Generated: Hello,I am Reading this! And I know that it's not for everybody. But if you do like reading my book or the articles on other sites about me and how to deal with people who aren't interested in becoming feminists then please read these


# **FINE TUNING**



In [10]:
!pip install transformers datasets peft accelerate



In [11]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType
import torch

In [12]:
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Set padding token (GPT-2 doesn't have one by default)
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id

# DATASET

In [19]:
dataset = load_dataset(
    "text",
    data_files="https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
)

print(dataset)

Downloading data:   0%|          | 0.00/1.12M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 40000
    })
})


In [20]:
dataset = dataset["train"].train_test_split(test_size=0.1, seed=42)
print(dataset)


DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 36000
    })
    test: Dataset({
        features: ['text'],
        num_rows: 4000
    })
})


In [21]:
def tokenize_function(examples):
    """Convert text to tokens that the model can understand"""
    return tokenizer(
        examples["text"],
        truncation=True,
        max_length=256,
        padding="max_length",
    )

# Apply tokenization to both train and validation splits
tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=dataset["train"].column_names,
)


Map:   0%|          | 0/36000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

# LORA CONFIGURATION

In [23]:
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,  # Causal Language Modeling (text generation)
    r=8,  # LoRA rank (lower = fewer parameters, faster training)
    lora_alpha=32,  # LoRA scaling factor
    lora_dropout=0.1,  # Dropout for regularization
    target_modules=["c_attn"],  # Which layers to apply LoRA to
    bias="none",
)

print("✅ LoRA configuration created!")
print(f"   Rank (r): {lora_config.r}")
print(f"   Alpha: {lora_config.lora_alpha}")

✅ LoRA configuration created!
   Rank (r): 8
   Alpha: 32
