<a href="https://colab.research.google.com/github/chalescharli/LargeLanguageModelsProjects/blob/main/LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers datasets trl accelerate torch wandb peft




In [None]:
from datasets import load_dataset

# Load TinyStories dataset
dataset = load_dataset("roneneldan/TinyStories")

# Reduce dataset to 500 samples
train_subset = dataset["train"].select(range(500))
val_subset = dataset["validation"].select(range(100))  # 100 samples for validation

# Print dataset details
print(train_subset)
print(val_subset)


Dataset({
    features: ['text'],
    num_rows: 500
})
Dataset({
    features: ['text'],
    num_rows: 100
})


In [None]:
from transformers import AutoTokenizer

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("distilgpt2")

# Add a padding token to the tokenizer
tokenizer.pad_token = tokenizer.eos_token # Using eos_token as pad_token

# Tokenization function
def tokenize_function(example):
    tokenized = tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

# Tokenize dataset
train_tokenized = train_subset.map(tokenize_function, batched=True)
val_tokenized = val_subset.map(tokenize_function, batched=True)

train_tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
val_tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=8,
    num_train_epochs=1,
    logging_steps=50,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    fp16=True,  # Enable mixed-precision training
)




In [None]:
from transformers import AutoModelForCausalLM, Trainer, DataCollatorForLanguageModeling

# Load model
model = AutoModelForCausalLM.from_pretrained("distilgpt2")

# Data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized,
    data_collator=data_collator,
)

# Train the model
trainer.train()


[34m[1mwandb[0m: Currently logged in as: [33mrr7499042[0m ([33mrr7499042-parul-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Epoch,Training Loss,Validation Loss
1,2.6562,2.234162


TrainOutput(global_step=63, training_loss=2.62821294390966, metrics={'train_runtime': 1039.0136, 'train_samples_per_second': 0.481, 'train_steps_per_second': 0.061, 'total_flos': 16331046912000.0, 'train_loss': 2.62821294390966, 'epoch': 1.0})

In [None]:
from transformers import AutoModelForSequenceClassification

# Load reward model (DistilBERT)
reward_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=1)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
reward_training_args = TrainingArguments(
    output_dir="./reward_model",
    per_device_train_batch_size=8,
    num_train_epochs=2,
    evaluation_strategy="epoch",
    save_strategy="epoch",
)




In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

# Load the tokenizer for the reward model (DistilBERT)
reward_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

# Add a padding token to the reward tokenizer if it doesn't have one
reward_tokenizer.pad_token = reward_tokenizer.eos_token if reward_tokenizer.pad_token is None else reward_tokenizer.pad_token

# Re-tokenize the dataset using the reward model's tokenizer
def tokenize_function_reward(example):
    tokenized = reward_tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)
    # For sequence classification, we usually don't need labels to be the same as input_ids
    #  tokenized["labels"] = tokenized["input_ids"].copy()  # Remove or adjust this line
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

# Load the tokenizer for the reward model (DistilBERT)
reward_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

# Add a padding token to the reward tokenizer if it doesn't have one
reward_tokenizer.pad_token = reward_tokenizer.eos_token if reward_tokenizer.pad_token is None else reward_tokenizer.pad_token

# Re-tokenize the dataset using the reward model's tokenizer
def tokenize_function_reward(example):
    tokenized = reward_tokenizer(example["text"], truncation=True, padding="max_length", max_length=128)
    # For sequence classification, we usually don't need labels to be the same as input_ids
    #  tokenized["labels"] = tokenized["input_ids"].copy()  # Remove or adjust this line
    return tokenized

train_tokenized_reward = train_subset.map(tokenize_function_reward)

In [None]:
!pip install trl transformers --upgrade



In [None]:
!pip install --upgrade trl transformers



In [None]:
ppo_config = PPOConfig(
    output_dir="./ppo_results",  # Specify the output directory
    learning_rate=1e-5,
    batch_size=4,
    mini_batch_size=1,
    gradient_accumulation_steps=1
)

In [None]:
import torch

# Assuming train_tokenized_reward is the correct dataset for PPO
for batch in train_tokenized_reward:
    # Create a dictionary with the input_ids
    # Reshape the input_ids to have a batch dimension
    input_data = {"input_ids": torch.tensor([batch["input_ids"]])}  # Add batch dimension

    # Move input data to the same device as the model (recommended)
    input_data = {k: v.to(model.device) for k, v in input_data.items()}

    # Generate response
    response_tensors = model.generate(**input_data)

    # ... rest of your code ...

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

In [None]:
model.save_pretrained("./final_rlhf_model")
tokenizer.save_pretrained("./final_rlhf_model")


('./final_rlhf_model/tokenizer_config.json',
 './final_rlhf_model/special_tokens_map.json',
 './final_rlhf_model/vocab.json',
 './final_rlhf_model/merges.txt',
 './final_rlhf_model/added_tokens.json',
 './final_rlhf_model/tokenizer.json')

In [None]:
from transformers import pipeline

# Load fine-tuned model
generation_pipeline = pipeline("text-generation", model="./final_rlhf_model")

# Generate text
print(generation_pipeline("Once upon a time, a little robot", max_length=50))


Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


[{'generated_text': "Once upon a time, a little robot was playing outside in the park. He asked everyone to see if there was a new kind of toy. It didn't look like a big toy, but instead he found a nice toy.\n\nSuddenly,"}]
