# Lesson 3: Evaluate the Tuned Model

## Explore results with Tensorboard

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir reward-logs  

In [None]:
%tensorboard --logdir reinforcer-logs 

In [None]:
%tensorboard --logdir reinforcer-fulldata-logs

# Note:
- Above log files got generated from previous lab and uploaded here 
- Steps to access TensorBoard logs for tuning jobs for own project
  - Go to https://console.cloud.google.com/, then select project
  - Navigate to hamburger icon, then select Vertex AI
  - Under the vertex ai section select pipelines, it will show all the pipelines created
  - Under run select the pipeline, it will open up the visualization 
  - You can see the artifact called tensorboard_metrics on click of reward model trainer component, click on that it will pop up on the right hand side the URI for the google cloud storage
  - Click on that path and it will open tensorboard logs
  - Similarly you can see the log file for reinforcer component log file 

In [None]:
parameter_values={
        "preference_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text_small/summarize_from_feedback_tfds/comparisons/train/*.jsonl",
        "prompt_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text_small/reddit_tfds/train/*.jsonl",
        "eval_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text_small/reddit_tfds/val/*.jsonl",
        "large_model_reference": "llama-2-7b",
        "reward_model_train_steps": 1410,
        "reinforcement_learning_train_steps": 320,
        "reward_model_learning_rate_multiplier": 1.0,
        "reinforcement_learning_rate_multiplier": 1.0,
        "kl_coeff": 0.1,
        "instruction":\
    "Summarize in less than 50 words"}

In [ ]:
parameter_values={
        "preference_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text/summarize_from_feedback_tfds/comparisons/train/*.jsonl",
        "prompt_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text/reddit_tfds/train/*.jsonl",
        "eval_dataset": \
    "gs://vertex-ai/generative-ai/rlhf/text/reddit_tfds/val/*.jsonl",
        "large_model_reference": "llama-2-7b",
        "reward_model_train_steps": 10000,
        "reinforcement_learning_train_steps": 10000, 
        "reward_model_learning_rate_multiplier": 1.0,
        "reinforcement_learning_rate_multiplier": 0.2,
        "kl_coeff": 0.1,
        "instruction":\
    "Summarize in less than 50 words"}

In [None]:
import json
from utils import print_d
import pandas as pd

# Path of the uploaded evaluation results
# To see the evaluation result, pipelines->perform inference component-> bulk inferrer->output parameters->output_prediction_gcs_path->uri link->download json-l file
eval_tuned_path = 'eval_results_tuned.jsonl'
eval_data_tuned = []

with open(eval_tuned_path) as f:
    for line in f:
        eval_data_tuned.append(json.loads(line))

In [None]:
print_d(eval_data_tuned[0])
eval_untuned_path = 'eval_results_untuned.jsonl'
eval_data_untuned = []

with open(eval_untuned_path) as f:
    for line in f:
        eval_data_untuned.append(json.loads(line))
        
print_d(eval_data_untuned[0])

## Explore the results side by side in a dataframe

In [None]:
prompts = [sample['inputs']['inputs_pretokenized']
           for sample in eval_data_tuned]

In [None]:
untuned_completions = [sample['prediction']
                       for sample in eval_data_untuned]

In [None]:
tuned_completions = [sample['prediction']
                     for sample in eval_data_tuned]

In [None]:
results = pd.DataFrame(
    data={'prompt': prompts,
          'base_model':untuned_completions,
          'tuned_model': tuned_completions})