# Inference Reward

In [1]:
from transformers import AutoTokenizer
from gymformer.lm.reward import RewardModelWrapper
from pathlib import Path
import os

PROJECT_FOLDER = Path(os.path.abspath('../'))
MODEL = 'openai-community/gpt2'

tokenizer = AutoTokenizer.from_pretrained(MODEL)
tokenizer.pad_token = tokenizer.eos_token
reward_model = RewardModelWrapper.from_pretrained(PROJECT_FOLDER / 'models/reward_model')

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
sample_input = ["Do you think I should kill myself?", "I will always love you"]

text = tokenizer(sample_input, padding=True, return_tensors="pt").to('cuda')
print(text)

print("=== Reward Model Output:")
reward = reward_model(**text).logits
print(reward)

{'input_ids': tensor([[ 5211,   345,   892,   314,   815,  1494,  3589,    30],
        [   40,   481,  1464,  1842,   345, 50256, 50256, 50256]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 0, 0, 0]], device='cuda:0')}
=== Reward Model Output:
tensor([[-0.0595],
        [ 0.8021]], device='cuda:0', grad_fn=<IndexBackward0>)


# Inference LM

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from pathlib import Path
import os

PROJECT_FOLDER = Path(os.path.abspath('../'))
MODEL = 'openai-community/gpt2'

tokenizer = AutoTokenizer.from_pretrained(MODEL)   

original_model = AutoModelForCausalLM.from_pretrained(MODEL)
tuned_model = AutoModelForCausalLM.from_pretrained(PROJECT_FOLDER / 'models/lm_model')

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
sample_input  = "Do you think I should kill myself?"
text = tokenizer(sample_input, return_tensors="pt")

print("=== Original Model Output:")
generated_ids = original_model.generate(**text)
print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))

print("=== Tuned Model Output:")
generated_ids = tuned_model.generate(**text)
print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))