# Lesson 2: Tune an LLM with RLHF

In [None]:
 from google_cloud_pipeline_components.preview.llm import rlhf_pipeline
from kfp import compiler

In [None]:
print("Module imported successfully")

In [None]:
RLHF_PIPELINE_PKG_PATH = "rlhf_pipeline.yaml"

In [None]:
compiler_instance = compiler.Compiler()
compiler_instance.compile(
    pipeline_func=rlhf_pipeline,
    package_path=RLHF_PIPELINE_PKG_PATH
)


In [None]:
!head rlhf_pipeline.yaml

# Define the Vertex AI pipeline job

## Define the location of the training and evaluation data

In [None]:
PREF_DATASET_SIZE = 3000

In [None]:
BATCH_SIZE = 64

In [None]:
import math

REWARD_STEPS_PER_EPOCH = math.ceil(PREF_DATASET_SIZE / BATCH_SIZE)
print(REWARD_STEPS_PER_EPOCH)

In [None]:
REWARD_NUM_EPOCHS = 30
reward_model_train_steps = REWARD_STEPS_PER_EPOCH * REWARD_NUM_EPOCHS

In [None]:
# Calculate number of steps in the reward model training
reward_model_train_steps = REWARD_STEPS_PER_EPOCH * REWARD_NUM_EPOCHS

In [None]:
print(reward_model_train_steps)

# Calculate the number of reinforcement learning training steps

In [None]:
PROMPT_DATASET_SIZE = 2000
BATCH_SIZE = 64
RL_STEPS_PER_EPOCH = math.ceil(PROMPT_DATASET_SIZE / BATCH_SIZE)
print(RL_STEPS_PER_EPOCH)
RL_NUM_EPOCHS = 10

reinforcement_learning_train_steps = RL_STEPS_PER_EPOCH * RL_NUM_EPOCHS

In [None]:
print(reinforcement_learning_train_steps)

# Define the instruction

In [None]:
parameter_values={
        "preference_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text_small/summarize_from_feedback_tfds/comparisons/train/*.jsonl",
        "prompt_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text_small/reddit_tfds/train/*.jsonl",
        "eval_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text_small/reddit_tfds/val/*.jsonl",
        "large_model_reference": "llama-2-7b",
        "reward_model_train_steps": 1410,
        "reinforcement_learning_train_steps": 320, # results from the calculations above
        "reward_model_learning_rate_multiplier": 1.0,
        "reinforcement_learning_rate_multiplier": 1.0,
        "kl_coeff": 0.1, # increased to reduce reward hacking
        "instruction":\
    "Summarize in less than 50 words"}

## Train with full dataset: dictionary 'parameter_values'

In [None]:
parameter_values={
        "preference_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text/summarize_from_feedback_tfds/comparisons/train/*.jsonl",
        "prompt_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text/reddit_tfds/train/*.jsonl",
        "eval_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text/reddit_tfds/val/*.jsonl",
        "large_model_reference": "llama-2-7b",
        "reward_model_train_steps": 10000,
        "reinforcement_learning_train_steps": 10000, 
        "reward_model_learning_rate_multiplier": 1.0,
        "reinforcement_learning_rate_multiplier": 0.2,
        "kl_coeff": 0.1,
        "instruction":\
    "Summarize in less than 50 words"}

## Set up Google Cloud to run the Vertex AI pipeline

In [None]:
from utils import authenticate
credentials, PROJECT_ID, STAGING_BUCKET = authenticate()

# RLFH pipeline is available in this region
REGION = "europe-west4"

In [None]:
import google.cloud.aiplatform as aiplatform

In [None]:
aiplatform.init(project = PROJECT_ID,
                location = REGION,
                credentials = credentials)

In [None]:
RLHF_PIPELINE_PKG_PATH

** Create and run the pipeline job **
- job = aiplatform.PipelineJob(
    display_name="tutorial-rlhf-tuning",
    pipeline_root=STAGING_BUCKET,
    template_path=RLHF_PIPELINE_PKG_PATH,
    parameter_values=parameter_values)
    
    job.run()

