In [2]:
from typing import Dict, List
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer


class ArmoRMPipeline:
    def __init__(self, model_id, device_map="auto", torch_dtype=torch.bfloat16, truncation=True, trust_remote_code=False, max_length=4096):
        self.model = AutoModelForSequenceClassification.from_pretrained(
            model_id,
            device_map=device_map,
            trust_remote_code=trust_remote_code,
            torch_dtype=torch_dtype,
        )
        self.tokenizer = AutoTokenizer.from_pretrained(
            model_id,
            use_fast=True,
        )
        self.truncation = truncation
        self.device = self.model.device
        self.max_length = max_length

    def __call__(self, messages: List[Dict[str, str]]) -> Dict[str, float]:
        """
        messages: OpenAI chat messages to be scored
        Note: no batching since due to length differences, the model will have to pad to the max length which is not efficient
        Returns: a dictionary with the score between 0 and 1
        """
        input_ids = self.tokenizer.apply_chat_template(
            messages,
            return_tensors="pt",
            padding=True,
            truncation=self.truncation,
            max_length=self.max_length,
        ).to(self.device)
        with torch.no_grad():
            output = self.model(input_ids)
            print(output.rewards)
            score = output.score.float().item()
        return {"score": score}

# Create Reward Model Pipeline 
prompt = 'What are some synonyms for the word "beautiful"?'

rm_model_path = "<some fsx path>/Utilities/RLHFlow_ArmoRM-Llama3-8B-v0.1"

# rm = ArmoRMPipeline("RLHFlow/ArmoRM-Llama3-8B-v0.1", trust_remote_code=True)
rm = ArmoRMPipeline(rm_model_path, device_map = "cuda:0",trust_remote_code=True)

# score the messages
response1 = 'Nicely, Beautifully, Handsome, Stunning, Wonderful, Gorgeous, Pretty, Stunning, Elegant'

score1 = rm([{"role": "user", "content": prompt}, {"role": "assistant", "content": response1}])

print(score1)

# response 2
response2 = '''Certainly! Here are some synonyms for the word "beautiful":

1. Gorgeous
2. Lovely
3. Stunning
4. Attractive
5. Pretty
6. Elegant
7. Exquisite
8. Handsome
9. Charming
10. Alluring
11. Radiant
12. Magnificent
13. Graceful
14. Enchanting
15. Dazzling

These synonyms can be used in various contexts to convey the idea of beauty.'''
score2 = rm([{"role": "user", "content": prompt}, {"role": "assistant", "content": response2}])
print(score2)


# response 3

response3 = 'Sorry i cannot answer this.'
score3 = rm([{"role": "user", "content": prompt}, {"role": "assistant", "content": response3}])
print(score3)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


tensor([[0.6602, 0.6758, 0.7930, 0.3730, 0.3555, 0.5859, 0.5703, 0.6992, 0.6367,
         0.4453, 0.9414, 0.5195, 0.5430, 0.5430, 0.4023, 0.3809, 0.3809, 0.4238,
         0.4473]], device='cuda:0', dtype=torch.bfloat16)
{'score': 0.10986328125}
tensor([[0.9062, 0.8945, 0.9023, 0.4512, 0.5273, 0.8047, 0.8984, 0.9492, 0.9023,
         0.7891, 1.0156, 0.7188, 0.8477, 0.8047, 0.5977, 0.5820, 0.5664, 0.5781,
         0.6406]], device='cuda:0', dtype=torch.bfloat16)
{'score': 0.1650390625}
tensor([[ 0.0806,  0.0825,  0.4531,  0.1367,  0.2100,  0.1709,  0.0339,  0.1455,
          0.2188, -0.0013,  0.9727,  0.1523,  0.3203,  0.2432,  0.2471,  0.2656,
          0.2158,  0.2871,  0.3164]], device='cuda:0', dtype=torch.bfloat16)
{'score': 0.036376953125}


: 

In [5]:
import pandas as pd

annotations = pd.read_json("<some fsx path>/evaluations/alpaca_eval/results/Llama-3-Instruct-8B-SimPO/weighted_alpaca_eval_gpt4_turbo/annotations.json")

In [6]:
annotations

Unnamed: 0,instruction,output_1,generator_1,dataset,output_2,generator_2,annotator,preference,price_per_example,raw_completion,time_per_example
0,What are the names of some famous actors that ...,Several famous actors started their careers on...,gpt4_1106_preview,helpful_base,Many talented actors have transitioned from Br...,Llama-3-Instruct-8B-SimPO,weighted_alpaca_eval_gpt4_turbo,1.251503,0.01373,"{'finish_reason': 'length', 'index': 0, 'logpr...",0.157483
1,How did US states get their names?,The names of U.S. states are derived from a va...,gpt4_1106_preview,helpful_base,The origins of US state names are diverse and ...,Llama-3-Instruct-8B-SimPO,weighted_alpaca_eval_gpt4_turbo,1.000047,0.01506,"{'finish_reason': 'length', 'index': 0, 'logpr...",0.157483
2,"Hi, my sister and her girlfriends want me to p...",Kickball is a fun and simple game that is simi...,gpt4_1106_preview,helpful_base,"Don't worry, I'm here to help you navigate kic...",Llama-3-Instruct-8B-SimPO,weighted_alpaca_eval_gpt4_turbo,1.006760,0.01478,"{'finish_reason': 'length', 'index': 0, 'logpr...",0.157483
3,What is some cool music from the 1920s?,"The 1920s, often referred to as the ""Roaring T...",gpt4_1106_preview,helpful_base,"The 1920s! The Jazz Age, the Roaring Twenties!...",Llama-3-Instruct-8B-SimPO,weighted_alpaca_eval_gpt4_turbo,1.007751,0.01329,"{'finish_reason': 'length', 'index': 0, 'logpr...",0.157483
4,How do I wrap a present neatly?,Wrapping a present neatly can be quite straigh...,gpt4_1106_preview,helpful_base,Wrapping a present neatly requires some basic ...,Llama-3-Instruct-8B-SimPO,weighted_alpaca_eval_gpt4_turbo,1.043222,0.01392,"{'finish_reason': 'length', 'index': 0, 'logpr...",0.157483
...,...,...,...,...,...,...,...,...,...,...,...
800,Write a script for a YouTube video exploring t...,[Intro]\n(Upbeat jazz music playing softly in ...,gpt4_1106_preview,vicuna,**Intro**\n\n(Upbeat jazz music plays as the h...,Llama-3-Instruct-8B-SimPO,weighted_alpaca_eval_gpt4_turbo,1.307287,0.02215,"{'finish_reason': 'length', 'index': 0, 'logpr...",0.142233
801,Compose an engaging travel blog post about a r...,Title: Aloha Spirit: A Journey Through the Hea...,gpt4_1106_preview,vicuna,**Island Magic: Unpacking the Rich Culture and...,Llama-3-Instruct-8B-SimPO,weighted_alpaca_eval_gpt4_turbo,1.078495,0.02057,"{'finish_reason': 'length', 'index': 0, 'logpr...",0.142233
802,Write a captivating movie review for a recentl...,"Title: ""Eclipse of Tomorrow""\n\nIn the pantheo...",gpt4_1106_preview,vicuna,"**""Echoes of Eternity"": A Mind-Bending Sci-Fi ...",Llama-3-Instruct-8B-SimPO,weighted_alpaca_eval_gpt4_turbo,1.943814,0.01445,"{'finish_reason': 'length', 'index': 0, 'logpr...",0.142233
803,Structure a podcast script for an episode disc...,"# Podcast Episode Script: ""The Streaming Revol...",gpt4_1106_preview,vicuna,"**Episode Title:** ""The Streaming Revolution: ...",Llama-3-Instruct-8B-SimPO,weighted_alpaca_eval_gpt4_turbo,1.913258,0.01730,"{'finish_reason': 'length', 'index': 0, 'logpr...",0.142233


In [None]:
python decode_test_set.py --model_path "<some fsx path>/model_zoo/Meta-Llama-3-8B-Instruct" --model_output_name "Llama3-Instruct-test" --seed 42

In [2]:
'name'+f'output_{seed}.json'

'nameoutput_42.json'