|          | GPT3.5 | GPT4 |
|----------|----------|----------|
| GPT3.5   | 1      | 0      |
| GPT4     | 1      | 1      |

In [None]:
from langchain.chat_models import AzureChatOpenAI

# assuming LLM api keys have been set in the environment

llm_35 = AzureChatOpenAI(
    deployment_name="gpt-35-turbo",
    temperature=0,
    request_timeout=20,
    max_retries=1,
    client=None,
)

llm_4 = AzureChatOpenAI(
    deployment_name="gpt-4-32k",
    temperature=0,
    request_timeout=20,
    max_retries=1,
    client=None,
)

llm_35.predict('What American cartoonist is the creator of Andy Lippincott?')

In [None]:
from langchain.prompts.prompt import PromptTemplate

_SIMPLE_PROMPT = """{context}"""


OTHER_PROMPT = PromptTemplate(
    input_variables=["context"],
    template=_SIMPLE_PROMPT,
)

In [None]:
from graph_feedback_chain import GraphFeedbackChain

graph_chain = GraphFeedbackChain.from_llms({"gpt-35-turbo": llm_35, "gpt-4": llm_4}, prompt=OTHER_PROMPT)


In [None]:
inputs={"selected": "gpt-35-turbo", "context":"What American cartoonise is the creator of Andy Lippincott?"}

graph_chain.run(inputs)

In [None]:
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

template = "PLEASE RESPOND ONLY WITH A SIGNLE FLOAT AND NO OTHER TEXT EXPLANATION\n You are a VERY VERY strict judge that is called on to rank a response based on given criteria.\
                You must respond with your ranking by providing a single float within the range [-1, 1], -1 being very bad response and 1 being very good response."
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_template = "Given the answer: {answer} to the question: {context}, how would you rate the answer of {llm_response}"
human_message_prompt = HumanMessagePromptTemplate.from_template(
    human_template
)

REWARD_PROMPT = ChatPromptTemplate.from_messages(
    [system_message_prompt, human_message_prompt]
)

In [None]:
from rl_chain.pick_best_chain import ContextualBanditTextEmbedder
class GraphFeedbackTextEmbedder(ContextualBanditTextEmbedder):
    """Specific text embedder to the graph used in the example notebook, not for general consumption"""
    def to_vw_format(self, inputs, cb_label=None) -> str:
        if cb_label:
            chosen_action, cost, prob = cb_label

        context = inputs.get('context')
        actions = inputs.get('actions')
        graph = inputs.get('graph')

        context_emb = self.embed_context(context) if context else None
        action_embs = self.embed_actions(actions) if actions else None

        if not context_emb or not action_embs:
            raise ValueError("Context and actions must be provided in the inputs dictionary")

        example_string = ""
        example_string += f"shared graph {graph}"
        # example_string += f"shared "
        for ns, context in context_emb.items():
            example_string += f"|{ns} {context} "
        example_string += "\n"

        for i, action in enumerate(action_embs):
            if cb_label:
                if chosen_action == i:
                    example_string += f"{chosen_action}:{cost}:{1} "
                elif chosen_action == 1:
                    example_string += f"{chosen_action}:{cost}:{1} "
            for ns, action_embedding in action.items():
                example_string += f"|{ns} {action_embedding} "
            example_string += "\n"
        # Strip the last newline
        return example_string[:-1]

In [None]:

import rl_chain

chain = rl_chain.pick_best_chain.PickBest.from_chain(
    llm_chain=graph_chain,
    model_save_dir="./gf_models_actual_gf",  # where to save the model checkpoints
    response_validator = rl_chain.pick_best_chain.AutoValidatePickOne(llm=llm_35, prompt=REWARD_PROMPT),
    vw_cmd=['--cb_explore_adf', '--quiet', '--interactions=::', '--coin', '--graph_feedback'],
    prompt=OTHER_PROMPT,
    text_embedder=GraphFeedbackTextEmbedder('bert-base-nli-mean-tokens'),
)

In [None]:

inputs = {"actions":["gpt-35-turbo", "gpt-4"], "graph": "0,0,1 0,1,0 1,0,1 1,1,1"}

for q, a in [("which country is john berry from", "United States of America country in North America")]:
    try:
        inputs["context"] = q
        inputs["answer"] = a
        r = chain.run(inputs)
        resp = r["response"]
        chosen_action = r["response_result"].chosen_action
        cost = r["response_result"].cost
        print(f"m: {chosen_action} -- answer: {a} -- response: {resp}")
        print(r["response_result"].cost)
        print(r["response_result"].chosen_action)

    except Exception as e:
        print(e)
