In [5]:
import os

import dotenv
from langchain_openai import AzureChatOpenAI
from langchain_community.retrievers import WikipediaRetriever
from langchain_core.prompts import ChatPromptTemplate

dotenv.load_dotenv()
DEPLOYMENT_NAME = os.getenv("DEPLOYMENT_NAME")

llm = AzureChatOpenAI(
    model="gpt-4",
    temperature=0.3,
    azure_deployment=DEPLOYMENT_NAME,
)


wiki = WikipediaRetriever(top_k_results=6, doc_content_chars_max=2000)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You're a helpful AI assistant. Given a user question and some Wikipedia article snippets, answer the user question. If none of the articles answer the question, just say you don't know.\n\nHere are the Wikipedia articles:{context}",
        ),
        ("human", "{question}"),
    ]
)
prompt.pretty_print()


You're a helpful AI assistant. Given a user question and some Wikipedia article snippets, answer the user question. If none of the articles answer the question, just say you don't know.

Here are the Wikipedia articles:[33;1m[1;3m{context}[0m


[33;1m[1;3m{question}[0m


In [6]:
from operator import itemgetter
from typing import List

from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import (
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)


def format_docs(docs: List[Document]) -> str:
    """Convert Documents to a single string.:"""
    formatted = [
        f"Article Title: {doc.metadata['title']}\nArticle Snippet: {doc.page_content}"
        for doc in docs
    ]
    return "\n\n" + "\n\n".join(formatted)


format = itemgetter("docs") | RunnableLambda(format_docs)
# subchain for generating an answer once we've done retrieval
answer = prompt | llm | StrOutputParser()
# complete chain that calls wiki -> formats docs to string -> runs answer subchain -> returns just the answer and retrieved docs.
chain = (
    RunnableParallel(question=RunnablePassthrough(), docs=wiki)
    .assign(context=format)
    .assign(answer=answer)
    .pick(["answer", "docs"])
)

In [8]:
chain.invoke("How fast are cheetahs?")["answer"]

'Cheetahs are capable of running at speeds of 93 to 104 km/h (58 to 65 mph), making them the fastest land animal.'

In [15]:
from langchain.callbacks import get_openai_callback

with get_openai_callback() as cb:
    chain.invoke("Which club is the champion of 2024 UEFA Champion League")
    print(cb.total_cost)
    print(cb.total_tokens)
    print(cb.prompt_tokens)
    print(cb.completion_tokens)

0.16662
2753
2729
24


In [16]:
from langchain_core.pydantic_v1 import BaseModel, Field


class cited_answer(BaseModel):
    """Answer the user question based only on the given sources, and cite the sources used."""

    answer: str = Field(
        ...,
        description="The answer to the user question, which is based only on the given sources.",
    )
    citations: List[int] = Field(
        ...,
        description="The integer IDs of the SPECIFIC sources which justify the answer.",
    )

In [20]:
llm_with_tool = llm.bind_tools(
    [cited_answer],
    tool_choice="cited_answer",
)
example_q = """What Brian's height?

Source: 1
Information: Suzy is 6'2"

Source: 2
Information: Jeremiah is blonde

Source: 3
Information: Brian is 3 inches shorted than Suzy"""
llm_with_tool.invoke(example_q)

AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_5anG6K4PwyBDh4eQXVFDN0F4', 'function': {'arguments': '{\n  "answer": "Brian is 5\'11\\"",\n  "citations": [1, 3]\n}', 'name': 'cited_answer'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 147, 'total_tokens': 172}, 'model_name': 'gpt-4-32k', 'system_fingerprint': None, 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {}}, id='run-6feb96cb-2207-4ec6-8524-8d67086cfccb-0', tool_calls=[{'name': 'cited_answer', 'args': {'answer': 'Brian is 5\'11"', 'citations': [1, 3]}, 'id': 'call_5anG6K4PwyBDh4eQXVFDN0F4'}], usage_metadata={'input_tokens': 147, 'output_tokens': 25, 'tot

In [21]:
from langchain.output_parsers.openai_tools import JsonOutputKeyToolsParser

output_parser = JsonOutputKeyToolsParser(key_name="cited_answer", first_tool_only=True)
(llm_with_tool | output_parser).invoke(example_q)

{'answer': 'Brian is 5\'11"', 'citations': [1, 3]}

In [22]:
def format_docs_with_id(docs: List[Document]) -> str:
    formatted = [
        f"Source ID: {i}\nArticle Title: {doc.metadata['title']}\nArticle Snippet: {doc.page_content}"
        for i, doc in enumerate(docs)
    ]
    return "\n\n" + "\n\n".join(formatted)


format_1 = itemgetter("docs") | RunnableLambda(format_docs_with_id)
answer_1 = prompt | llm_with_tool | output_parser
chain_1 = (
    RunnableParallel(question=RunnablePassthrough(), docs=wiki).assign(context=format_1)
    # .assign(cited_answer=answer_1)
    # .pick(["cited_answer", "docs"])
)

In [23]:
chain_1.invoke("Which club is the champion of 2024 UEFA Champion League?")

{'cited_answer': {'answer': 'Real Madrid is the champion of the 2024 UEFA Champions League.',
  'citations': [3]},
 'docs': [Document(page_content="The 2024–25 UEFA Champions League will be the 70th season of Europe's premier club football tournament organised by UEFA, and the 33rd season since it was rebranded from the European Champion Clubs' Cup to the UEFA Champions League. This will be the first season under a new format, where in the league phase each team plays eight games against different opponents, but all 36 teams are ranked in a joint group.\nThe final will be played on 31 May 2025 at Allianz Arena in Munich, Germany.  The winners of the 2024–25 UEFA Champions League will automatically qualify for the 2025–26 UEFA Champions League league phase, the 2025 FIFA Intercontinental Cup, the 2029 FIFA Club World Cup, and earn the right to play against the winners of the 2024–25 UEFA Europa League in the 2025 UEFA Super Cup.\nReal Madrid are the defending champions, having won their