In [None]:
class MealPlanner:
    def __init__(self, name: str, desc: str, difficulty: str, tags: str):
        try:
            self.name = name
            self.desc = desc
            self.diff = difficulty
            self.tags = tags
        except:
            print(name)
            raise ValueError

In [None]:
## Actions
## examples copied from hello fresh website
actions = []
actions.append(MealPlanner(name="One-Pan Beef Enchiladas Verdes with Mexican Cheese Blend & Hot Sauce Crema", difficulty="Easy", tags="Spicy, Easy Cleanup, Easy Prep", desc="When it comes to Mexican-style cuisine, burritos typically get all the glory. In our humble opinion, enchiladas are an unsung dinner hero. They’re technically easier-to-assemble burritos that get smothered in a delicious sauce, but they’re really so much more than that! Ours start with spiced beef and charred green pepper that get rolled up in warm tortillas. This winning combo gets topped with tangy salsa verde and cheese, then baked until bubbly and melty. Hear that? That’s the sound of the dinner bell!"))
actions.append(MealPlanner(name="Chicken & Mushroom Flatbreads with Gouda Cream Sauce & Parmesan", difficulty="Easy", tags="", desc="Yes we love our simple cheese pizza with red sauce but tonight, move over, marinara—there’s a new sauce in town. In this recipe, crispy flatbreads are slathered with a rich, creamy gouda-mustard sauce we just can’t get enough of. We top that off with a pile of caramelized onion and earthy cremini mushrooms. Shower with Parmesan, and that’s it. Simple, satisfying, and all in 30 minutes–a dinner idea you can’t pass up!"))
actions.append(MealPlanner(name="Sweet Potato & Pepper Quesadillas with Southwest Crema & Tomato Salsa", difficulty="Easy", tags="Veggie", desc="This quesadilla is jam-packed with flavorful roasted sweet potato and green pepper, plus two types of gooey, melty cheese (how could we choose just one?!). Of course, we’d never forget the toppings—there’s a fresh tomato salsa and dollops of spiced lime crema. Now for the fun part: piling on a little bit of everything to construct the perfect bite!"))
actions.append(MealPlanner(name="One-Pan Trattoria Tortelloni Bake with a Crispy Parmesan Panko Topping", difficulty="Easy", tags="Veggie, Easy Cleanup, Easy Prep", desc="Think a cheesy stuffed pasta can’t get any better? What about baking it in a creamy sauce with a crispy topping? In this recipe, we toss cheese-stuffed tortelloni in an herby tomato cream sauce, then top with Parmesan and panko breadcrumbs. Once broiled, it turns into a showstopping topping that’ll earn you plenty of oohs and aahs from your lucky fellow diners."))

meals = []
for action in actions:
    action_str = "title=" + action.name + " description=" + action.desc + " tags=" + action.tags
    action_str = action_str.replace(":", "")
    action_str = action_str.replace("|", "")
    meals.append(action_str)


In [None]:
from langchain.chat_models import AzureChatOpenAI
import langchain
langchain.debug = False
# assuming LLM api keys have been set in the environment
# can use whatever LLM you want here doesn't have to be AzureChatOpenAI

llm = AzureChatOpenAI(
    deployment_name="gpt-35-turbo",
    temperature=0,
    request_timeout=10,
    max_retries=1,
    client=None,
)

llm.predict('Are you ready?')

##### default chain default reward (the LLM is used to judge and rank the response)

In [None]:
import rl_chain

from langchain.prompts.prompt import PromptTemplate

_PROMPT_TEMPLATE = """Here is the description of a meal: {meal}.

You have to embed this into the given text where it makes sense. Here is the given text: {text_to_personalize}.

"""


PROMPT = PromptTemplate(
    input_variables=["meal", "text_to_personalize"], template=_PROMPT_TEMPLATE
)

chain = rl_chain.pick_best_chain.PickBest.from_llm(
    llm=llm,
    prompt=PROMPT,
    response_validator = rl_chain.pick_best_chain.AutoValidatePickBest(llm=llm),
)


In [None]:
# it is recommended to embedd the action features and the context feature

for _ in range(3):
    response = chain.run(User = rl_chain.BasedOn(rl_chain.Embed("Tom")),
    preference = rl_chain.BasedOn(rl_chain.Embed("Vegetarian, regular dairy is ok")),
    text_to_personalize = "This is the weeks specialty dish, our master chefs believe you will love it!",
    meal = rl_chain.Embed(rl_chain.ToSelectFrom(meals)))

    print(response["response"])
    rr = response["response_result"]
    print(f"cost: {rr.label.cost}, action: {rr.chosen_action}, probability: {rr.chosen_action_probability}, ")

In [None]:
from langchain.prompts.prompt import PromptTemplate

_OTHER_PROMPT_TEMPLATE = """You can use the actions that were chosen by VW like so: {action}.

And use whatever other vars you want to pass into the chain at run: {some_text}. And {some_other_text}

"""


OTHER_PROMPT = PromptTemplate(
    input_variables=["action", "some_text", "some_other_text"],
    template=_OTHER_PROMPT_TEMPLATE,
)

In [None]:
import rl_chain.pick_best_chain

chain = rl_chain.pick_best_chain.PickBest.from_llm(
    llm=llm,
    model_save_dir="./models",  # where to save the model checkpoints
    prompt=OTHER_PROMPT,
    response_validator = rl_chain.pick_best_chain.AutoValidatePickBest(llm=llm)
)

In [None]:
response = chain.run(
    some_text = "This is some text",
    some_other_text = "This is some other text",
    action=rl_chain.ToSelectFrom(["an action", "another action", "a third action"]),
    User = rl_chain.BasedOn("Tom"),
    preference = rl_chain.BasedOn("Vegetarian")
)

print(response["response"])
rr = response["response_result"]
print(f"cost: {rr.cost}, action: {rr.chosen_action}, probability: {rr.chosen_action_probability}, ")

#### actions and context with multiple namespaces

In [None]:
# each action is a dictionary of namespace to action string
# this example here shows that while embedding is recommended for all features, it is not required and can be customized
action_strs_w_ns = [{"A":"an action feature", "B" : rl_chain.Embed("antoher action feature")}, {"B": "another action"}, {"C":"a third action"}]

inputs = {
    "some_text": "This is some text",
    "some_other_text": "This is some other text",
    "action" : rl_chain.ToSelectFrom(action_strs_w_ns)
}

inputs["User"] = rl_chain.BasedOn("Tom")
inputs["preference"] = rl_chain.BasedOn(rl_chain.Embed("Vegetarian"))
response = chain.run(inputs)
print(response["response"])
rr = response["response_result"]
print(f"cost: {rr.cost}, action: {rr.chosen_action}, probability: {rr.chosen_action_probability}, ")


In [None]:
# store a checkpoint to the file (overriding existing checkpoint until the chain is restarted)
chain.save_progress()

##### chain with default prompt and custom reward prompt (the LLM is used to judge and rank the response)

In [None]:
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)

human_template = "Given {preference} rank how good or bad this selection is {action}"
human_message_prompt = HumanMessagePromptTemplate.from_template(
    human_template
)

REWARD_PROMPT = ChatPromptTemplate.from_messages(
    [rl_chain.pick_best_chain.AutoValidatePickBest.default_system_prompt, human_message_prompt]
)

In [None]:
import rl_chain

chain = rl_chain.pick_best_chain.PickBest.from_llm(
    llm=llm,
    prompt=OTHER_PROMPT,
    model_save_dir="./models",  # where to save the model checkpoints
    response_validator=rl_chain.pick_best_chain.AutoValidatePickBest(llm=llm, prompt=REWARD_PROMPT),
)

In [None]:
actions = ["an action", "another action", "a third action"]

response = chain.run(
    some_text = "Some text",
    some_other_text = "Some other text",
    action=rl_chain.ToSelectFrom(actions),
    User = rl_chain.BasedOn("Tom"),
    preference = rl_chain.BasedOn("Vegetarian"),
)
print(response["response"])
rr = response["response_result"]
print(f"cost: {rr.cost}, action: {rr.chosen_action}, probability: {rr.chosen_action_probability}, ")

##### other reward options

custom reward class

In [None]:
# custom reward class/function is just defining another class that inherits from RewardChecker and implementing the grade_response method
import rl_chain

class CustomResponseValidator(rl_chain.ResponseValidator):
    def grade_response(
        self, inputs, llm_response: str
    ) -> float:
        # do whatever you want here, use whatever inputs you supplied and return reward
        reward = 1.0
        return reward
    
# set this in the chain during construction (response_validator=CustomResponseValidator()) and it will be auto-called

Asynchronus user defined reward

In [None]:
import rl_chain

chain = rl_chain.pick_best_chain.PickBest.from_llm(
    llm=llm,
    prompt=PROMPT)

# whenever you have the reward for the call, send it back to the chain to learn from

response = chain.run(text_to_personalize = "This is the weeks specialty dish, our master chefs believe you will love it!",
                     meal = rl_chain.ToSelectFrom(meals),
                     User = rl_chain.BasedOn(rl_chain.Embed("Tom")),
                     preference = rl_chain.BasedOn("Vegetarian")
                    )
print(response["response"])
rr = response["response_result"]
# cost should be None here because we turned auto-checkin off
print(f"cost: {rr.cost}, action: {rr.chosen_action}, probability: {rr.chosen_action_probability}, ")
chain.learn_delayed_reward(reward=1.0, response_result=rr)