<a href="https://colab.research.google.com/github/gvsekhar/colab/blob/master/Chain_of_Verification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -qU langchain openai tiktoken

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.0/41.0 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llmx 0.0.15a0 requires cohere, which is not installed.[0m[31m
[0m

In [None]:
import os
import langchain
langchain.debug=True
from langchain import OpenAI, PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import SequentialChain, LLMChain
from langchain.output_parsers import PydanticOutputParser
from langchain.pydantic_v1 import BaseModel, Field

In [None]:
os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY"
llm = ChatOpenAI(temperature=0, model_name="gpt-4")
# llm = OpenAI(temperature=0, model_name="gpt-3.5-turbo-instruct") # GPT-3.5 tends to work less well in generating a consistent final response


query = "List 5 UK politicians born in London"

# 1. Baseline response chain

In [None]:
input_variables = ["query"]
base_response_output_key = "base_response"
base_response_template = """Question: {query} Answer:"""
base_repsonse_prompt_template = PromptTemplate(
    input_variables=input_variables, template=base_response_template
)

base_response_chain = LLMChain(
    llm=llm, prompt=base_repsonse_prompt_template, output_key=base_response_output_key
)

# 2. Plan verifications chain

In [None]:
plan_verifications_template = """
Given the below Question and answer, generate a series of verification questions that test the factual claims in the original baseline response.
For example if part of a longform model response contains the statement “The Mexican–American War
was an armed conflict between the United States and Mexico from 1846 to 1848”, then one possible
verification question to check those dates could be “When did the Mexican American war start and
end?”

Question: {query}
Answer: {base_response}

<fact in passage>, <verification question, generated by combining the query and the fact>

{format_instructions}
"""


class PlanVerificationsOutput(BaseModel):
    query: str = Field(description="The user's query")
    base_response: str = Field(description="The response to the user's query")
    facts_and_verification_questions: dict[str, str] = Field(
        description="Facts (as the dictionary keys) extracted from the response and verification questions related to the query (as the dictionary values)"
    )


plan_verifications_output_parser = PydanticOutputParser(
    pydantic_object=PlanVerificationsOutput
)

plan_verifications_prompt_template = PromptTemplate(
    input_variables=input_variables + [base_response_output_key],
    template=plan_verifications_template,
    partial_variables={
        "format_instructions": plan_verifications_output_parser.get_format_instructions()
    },
)
plan_verifications_chain = LLMChain(
    llm=llm,
    prompt=plan_verifications_prompt_template,
    output_key="output",
    output_parser=plan_verifications_output_parser,
)

# Baseline + Plan verification sequential chain

In [None]:
answer_and_plan_verification = SequentialChain(
    chains=[base_response_chain, plan_verifications_chain],
    input_variables=["query"],
    output_variables=["output"],
    verbose=True)


intermediate_result = answer_and_plan_verification.run(query)

[32;1m[1;3m[chain/start][0m [1m[1:chain:SequentialChain] Entering Chain run with input:
[0m{
  "query": "List 5 UK politicians born in London"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:SequentialChain > 2:chain:LLMChain] Entering Chain run with input:
[0m{
  "query": "List 5 UK politicians born in London"
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:SequentialChain > 2:chain:LLMChain > 3:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: Question: List 5 UK politicians born in London Answer:"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[1:chain:SequentialChain > 2:chain:LLMChain > 3:llm:ChatOpenAI] [2.78s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "1. Boris Johnson\n2. David Cameron\n3. Sadiq Khan\n4. Jeremy Corbyn\n5. Theresa May",
        "generation_info": {
          "finish_reason": "stop"
        },
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchai

In [None]:
intermediate_result.base_response

'1. Boris Johnson 2. David Cameron 3. Sadiq Khan 4. Jeremy Corbyn 5. Theresa May'

In [None]:
intermediate_result.facts_and_verification_questions

{'Boris Johnson is born in London': 'Was Boris Johnson born in London?',
 'David Cameron is born in London': 'Was David Cameron born in London?',
 'Sadiq Khan is born in London': 'Was Sadiq Khan born in London?',
 'Jeremy Corbyn is born in London': 'Was Jeremy Corbyn born in London?',
 'Theresa May is born in London': 'Was Theresa May born in London?'}

# 3. Execute verifications

In [None]:
claimed_facts = list(intermediate_result.facts_and_verification_questions.keys())
verification_questions = list(
    intermediate_result.facts_and_verification_questions.values()
)

In [None]:
verify_results_str = ""
verify_input_variables = ["question"]
verify_output_key = "answer"
verify_template = """{question}"""

verify_prompt_template = PromptTemplate(
    input_variables=verify_input_variables, template=verify_template
)

verify_chain = LLMChain(
    llm=llm, prompt=verify_prompt_template, output_key=verify_output_key
)
for i in range(len(verification_questions)):
    claimed_fact = claimed_facts[i]
    question = verification_questions[i]
    answer = verify_chain.run(question)
    answer = answer.lstrip("\n")
    verify_results_str += f"Question: {question}\nAnswer: {answer}\n\n"


[32;1m[1;3m[chain/start][0m [1m[1:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Was Boris Johnson born in London?"
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:LLMChain > 2:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: Was Boris Johnson born in London?"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[1:chain:LLMChain > 2:llm:ChatOpenAI] [1.23s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "No, Boris Johnson was born in New York City, United States.",
        "generation_info": {
          "finish_reason": "stop"
        },
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "No, Boris Johnson was born in New York City, United States.",
            "additional_kwargs": {}
          }
        }
      }
    ]
  ],
 

In [None]:
print(verify_results_str)

Question: Was Boris Johnson born in London?
Answer: No, Boris Johnson was born in New York City, United States.

Question: Was David Cameron born in London?
Answer: Yes, David Cameron was born in London, England.

Question: Was Sadiq Khan born in London?
Answer: Yes, Sadiq Khan was born in London, England.

Question: Was Jeremy Corbyn born in London?
Answer: No, Jeremy Corbyn was not born in London. He was born in Chippenham, England.

Question: Was Theresa May born in London?
Answer: No, Theresa May was born on October 1, 1956 in Eastbourne, Sussex, England.




# 4. Generate final response

In [None]:
final_response_input_variables = ["query", "base_response", "verify_results"]
final_response_template = """Given the ORIGINAL_QUESTION and the ORIGINAL_RESPONSE,
revise the ORIGINAL_RESPONSE (if applicable) such that it is consistent with information in VERIFIED_SOURCE.
Only keep consistent information.

<ORIGINAL_QUESTION>
{query}

<ORIGINAL_RESPONSE>
{base_response}

<VERIFIED_SOURCE>
{verify_results}

Final response:
"""
final_response_prompt_template = PromptTemplate(
    input_variables=final_response_input_variables,
    template=final_response_template,
)

final_response_chain = LLMChain(llm=llm, prompt=final_response_prompt_template)

final_response = final_response_chain.run(
    query=intermediate_result.query,
    base_response=intermediate_result.base_response,
    verify_results=verify_results_str,
)

[32;1m[1;3m[chain/start][0m [1m[1:chain:LLMChain] Entering Chain run with input:
[0m{
  "query": "List 5 UK politicians born in London",
  "base_response": "1. Boris Johnson 2. David Cameron 3. Sadiq Khan 4. Jeremy Corbyn 5. Theresa May",
  "verify_results": "Question: Was Boris Johnson born in London?\nAnswer: No, Boris Johnson was born in New York City, United States.\n\nQuestion: Was David Cameron born in London?\nAnswer: Yes, David Cameron was born in London, England.\n\nQuestion: Was Sadiq Khan born in London?\nAnswer: Yes, Sadiq Khan was born in London, England.\n\nQuestion: Was Jeremy Corbyn born in London?\nAnswer: No, Jeremy Corbyn was not born in London. He was born in Chippenham, England.\n\nQuestion: Was Theresa May born in London?\nAnswer: No, Theresa May was born on October 1, 1956 in Eastbourne, Sussex, England.\n\n"
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:LLMChain > 2:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: Given the ORIGIN

In [None]:
intermediate_result.base_response

'1. Boris Johnson 2. David Cameron 3. Sadiq Khan 4. Jeremy Corbyn 5. Theresa May'

In [None]:
final_response

'1. David Cameron 2. Sadiq Khan'