# Prepare core llm chain

In [1]:
import langchain
langchain.debug = False # set to True if  you want to see what the LLM is doing

from langchain.chat_models import AzureChatOpenAI

import dotenv
dotenv.load_dotenv()

llm = AzureChatOpenAI(
    deployment_name="gpt-35-turbo",
    temperature=0,
    request_timeout=20,
    max_retries=1,
    client=None,
)

llm.predict('Are you ready?')

'As an AI language model, I am always ready to assist you. How may I help you today?'

# Vanilla LLMChain

In [2]:
from langchain.chains.llm import LLMChain
from langchain.prompts.prompt import PromptTemplate

llm_chain = LLMChain(
    llm = llm,
    prompt = PromptTemplate(
        input_variables=["adjective", "content", "topic"],
        template="Hi, please create {adjective} {content} about {topic}.",
    ))

In [3]:
llm_chain.run(
    adjective = "funny",
    content = "poem",
    topic = "machine learning")

"In the world of tech, there's a new king\nAnd his name is Machine Learning\nHe's smart, he's quick, he's always learning\nAnd he's got us all yearning\n\nFor the day when he'll take over\nAnd we'll all be his loyal rover\nHe'll make decisions with precision\nAnd we'll all be in submission\n\nBut until that day arrives\nWe'll keep feeding him data and files\nWe'll teach him how to think and reason\nAnd he'll keep getting smarter each season\n\nSo let's raise a glass to Machine Learning\nThe future is bright, it's not concerning\nWe'll all be living in a world of ease\nThanks to our friend, Machine Learning, please!"

# Variable selection

In [6]:
import rl_chain
from langchain.prompts.prompt import PromptTemplate

llm_chain = rl_chain.slates_chain.SlatesPersonalizerChain.from_llm(
    llm=llm,
    prompt = PromptTemplate(
        input_variables=["adjective", "content", "topic"],
        template="Hi, please create {adjective} {content} about {topic}",
    ))

2023-08-15 11:10:53,019 - rl_chain.rl_chain_base - INFO - learning from scratch


In [5]:
r = llm_chain.run(
    adjective = rl_chain.ToSelectFrom(["funny"]),
    content = rl_chain.ToSelectFrom(["poem"]),
    topic = rl_chain.ToSelectFrom(["machine learning"]))

print(r["response"])
print(r["response_result"].actions)
print(r["response_result"].context)

It is ok to be uncertain about certain variable values

In [None]:
llm_chain.run(
    adjective = rl_chain.ToSelectFrom(["funny", "scary"]),
    content = rl_chain.ToSelectFrom(["poem"]),
    topic = rl_chain.ToSelectFrom(["machine learning", "cats"]))

# Full loop

In [None]:
import rl_chain
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)

template = """
using style {style}

{prefix}
{goal}: {context}.
{suffix}
"""
prompt = PromptTemplate(
    input_variables=["prefix", "goal", "context", "suffix", "style"],
    template=template,
)
chain = rl_chain.slates_chain.SlatesPersonalizerChain.from_llm(
    llm=llm,
    vw_logs = 'logs/stories.txt',
    model_save_dir="./models",  # where to save the model checkpoints
    prompt = prompt,
    response_validator = rl_chain.slates_chain.LLMResponseValidatorForSlates(
        llm,
        prompt = ChatPromptTemplate.from_messages(
                [
                    rl_chain.slates_chain.LLMResponseValidatorForSlates.default_system_prompt,
                    HumanMessagePromptTemplate.from_template(
                        '''Given the task:
                           {goal}: {context}
                           rank how good or bad this response is:
                           {llm_response}.'''
                    )]
    ))
)

chain.run(
    prefix = rl_chain.ToSelectFrom([f'ALWAYS DO EXACTLY WHAT I ASK YOU!', 'Please do your best to help me.']),
    goal = rl_chain.ToSelectFrom(['Write a funny story about']),
    context = rl_chain.ToSelectFrom(['Friends series']),
    suffix = rl_chain.ToSelectFrom(['Please try to be as funny as possible.', '']),
    style = "Shakespeare")


In [None]:
import rl_chain
from langchain.prompts.prompt import PromptTemplate

template = """
{prefix}
{goal}: {context}.
{suffix}
"""
prompt = PromptTemplate(
    input_variables=["prefix", "goal", "context", "suffix"],
    template=template,
)
chain = rl_chain.slates_chain.SlatesPersonalizerChain.from_llm(
    llm=llm,
    vw_logs = 'logs/stories.txt',
    model_save_dir="./models",  # where to save the model checkpoints
    prompt = prompt,
    response_validator = rl_chain.slates_chain.LLMResponseValidatorForSlates(
        llm,
        prompt = ChatPromptTemplate.from_messages(
                [
                    rl_chain.slates_chain.LLMResponseValidatorForSlates.default_system_prompt,
                    HumanMessagePromptTemplate.from_template(
                        '''Given the task:
                           {goal}: {context}
                           rank how good or bad this response is:
                           {llm_response}.'''
                    )]
    ))
)
chain.run(
    prefix = rl_chain.ToSelectFrom(rl_chain.Embed([f'ALWAYS DO EXACTLY WHAT I ASK YOU!', 'Please do your best to help me.'])),
    goal = rl_chain.ToSelectFrom([rl_chain.Embed('Write a funny story about')]),
    context = rl_chain.ToSelectFrom(['Friends series']),
    suffix = rl_chain.ToSelectFrom(['Please try to be as funny as possible.', '']))

# Experiment with mock llm

In [None]:
from typing import List
from tests.test_utils import MockValidator

class MockLLMChain:
    outcomes: List[List[float]] = None
    
    def __init__(self, outcomes, prompt):
        self.outcomes = outcomes
        self.prompt = prompt

    def run(self, prefix, suffix, **kwargs):
        return str(self.outcomes[int(prefix)][int(suffix)])

import rl_chain
from langchain.prompts.prompt import PromptTemplate

template = """
{prefix}
{context}
{suffix}
"""
prompt = PromptTemplate(
    input_variables=["prefix", "context", "suffix"],
    template=template,
)
chain = rl_chain.slates_chain.SlatesPersonalizerChain.from_llm(
    llm=llm,
    vw_logs = 'logs/mock.txt',
    model_save_dir="./models",  # where to save the model checkpoints
    prompt = prompt,
    response_validator = MockValidator()
)
chain.llm_chain = MockLLMChain([
    [0, 0.3],
    [0.6, 0.9]], prompt = prompt)
chain.run(
    prefix = rl_chain.ToSelectFrom(['0', '1']),
    context = rl_chain.ToSelectFrom(['bla']),
    suffix = rl_chain.ToSelectFrom(['0', '1']))

In [None]:
import rl_chain
from matplotlib import pyplot as plt

vw_chain = rl_chain.slates_chain.SlatesPersonalizerChain.from_llm(
    llm=llm,
    vw_logs = 'logs/mock.txt',
    model_save_dir="./models",  # where to save the model checkpoints
    prompt = prompt,
    policy = rl_chain.VwPolicy,
    response_validator = MockValidator()
)
vw_chain.llm_chain = MockLLMChain([
    [0, 0.3],
    [0.6, 0.9]], prompt = prompt)

rnd_chain = rl_chain.slates_chain.SlatesPersonalizerChain.from_llm(
    llm=llm,
    vw_logs = 'logs/mock.txt',
    model_save_dir="./models",  # where to save the model checkpoints
    prompt = prompt,
    policy = rl_chain.slates_chain.RandomPolicy,
    response_validator = MockValidator()
)
rnd_chain.llm_chain = MockLLMChain([
    [0, 0.3],
    [0.6, 0.9]], prompt = prompt)

for i in range(1000):
    vw_chain.run(
        prefix = rl_chain.ToSelectFrom(['0', '1']),
        context = rl_chain.ToSelectFrom(['bla']),
        suffix = rl_chain.ToSelectFrom(['0']))
    rnd_chain.run(
        prefix = rl_chain.ToSelectFrom(['0', '1']),
        context = rl_chain.ToSelectFrom(['bla']),
        suffix = rl_chain.ToSelectFrom(['0']))

vw_chain.reward['r'].rolling(window=100).mean().plot(label="vw")
rnd_chain.reward['r'].rolling(window=100).mean().plot(label="slates")
plt.legend()