In [2]:
!pip install trl==0.9.4

import torch
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType
from transformers import (
    AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer,
    DataCollatorForLanguageModeling
)
from trl import SFTTrainer

Collecting trl==0.9.4
  Downloading trl-0.9.4-py3-none-any.whl.metadata (11 kB)
Collecting tyro>=0.5.11 (from trl==0.9.4)
  Downloading tyro-0.9.31-py3-none-any.whl.metadata (11 kB)
Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl==0.9.4)
  Downloading shtab-1.7.2-py3-none-any.whl.metadata (7.4 kB)
Downloading trl-0.9.4-py3-none-any.whl (226 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.7/226.7 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tyro-0.9.31-py3-none-any.whl (131 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.7/131.7 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading shtab-1.7.2-py3-none-any.whl (14 kB)
Installing collected packages: shtab, tyro, trl
Successfully installed shtab-1.7.2 trl-0.9.4 tyro-0.9.31


In [4]:
import datasets
import transformers
print(f"datasets version: {datasets.__version__}")
print(f"transformers version: {transformers.__version__}")

datasets version: 4.0.0
transformers version: 4.56.1


In [6]:
try:
    # Force redownload and specify revision to avoid script issues
    dataset = load_dataset("amazon_reviews_multi", "en", split="train",
                          revision="main", download_mode="force_redownload")
except Exception as e:
    print(f"Error loading amazon_reviews_multi: {e}")
    # Fallback to a different dataset (e.g., IMDb) if needed
    print("Falling back to IMDb dataset...")
    dataset = load_dataset("imdb", split="train")

dataset = dataset.shuffle(seed=42).select(range(1000))

Error loading amazon_reviews_multi: Dataset scripts are no longer supported, but found amazon_reviews_multi.py
Falling back to IMDb dataset...


README.md: 0.00B [00:00, ?B/s]

plain_text/train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

plain_text/test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

plain_text/unsupervised-00000-of-00001.p(…):   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [7]:
def preprocess(examples):
    if "stars" in examples:  # amazon_reviews_multi
        labels = ["positive" if score > 3 else "negative" for score in examples["stars"]]
        texts = [f"Review: {text}\nSentiment: {label}" for text, label in zip(examples["review_body"], labels)]
    else:  # imdb fallback
        labels = ["positive" if label == 1 else "negative" for label in examples["label"]]
        texts = [f"Review: {text}\nSentiment: {label}" for text, label in zip(examples["text"], labels)]
    return {"text": texts}

In [8]:
dataset = dataset.map(preprocess, batched=True, remove_columns=dataset.column_names)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [9]:
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [10]:
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["c_attn", "c_proj"]
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 811,008 || all params: 125,250,816 || trainable%: 0.6475




In [11]:
def tokenize(examples):
    return tokenizer(examples["text"], truncation=True, padding=True, max_length=128)

tokenized_dataset = dataset.map(tokenize, batched=True)
tokenized_dataset = tokenized_dataset.train_test_split(test_size=0.1)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [14]:
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir="./lora-sentiment-model",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    fp16=True,
    save_steps=500,
    logging_steps=100,
    eval_strategy="steps",  # Changed from evaluation_strategy
    eval_steps=500,
    load_best_model_at_end=True,
    report_to="none"
)

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    dataset_text_field="text",
    tokenizer=tokenizer,
    data_collator=data_collator,
    max_seq_length=128
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
  super().__init__(


In [15]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.
`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


Step,Training Loss,Validation Loss


TrainOutput(global_step=171, training_loss=3.7505568119517543, metrics={'train_runtime': 52.7793, 'train_samples_per_second': 51.156, 'train_steps_per_second': 3.24, 'total_flos': 178053827788800.0, 'train_loss': 3.7505568119517543, 'epoch': 3.0})

In [18]:
def predict_sentiment(review):
    input_text = f"Review: {review}\nSentiment:"
    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
    model.eval()  # Ensure model is in evaluation mode
    with torch.no_grad():
        # Remove temperature, use default sampling or top_k for stability
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            do_sample=True,  # Enable sampling explicitly
            top_k=50  # Use top-k sampling to avoid temperature warning
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("Sentiment:")[-1].strip()

# Test with provided input
print(predict_sentiment("I love this product"))

Positive
This product is a great buy. The
