## Retrieval Augmented Generation for a Wiki Page using LangChain, OpenAI, and ChromaDB

In [67]:
## Load Data from the Wikipedia

from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [68]:
import wikipedia
search_term = "2023 Wimbledon Championships"
docs = WikipediaLoader(query=search_term, load_max_docs=1).load()

In [69]:
#Split the text into 100 characters chunks using the Recursive text splitter

text_splitter = RecursiveCharacterTextSplitter(
                    chunk_size=100,
                    chunk_overlap=20,
                    length_function=len,
                    is_separator_regex=False
                )

data = text_splitter.split_documents(docs)
data[:3]

[Document(page_content='The 2023 Wimbledon Championships was a Grand Slam tennis tournament that took place at the All', metadata={'title': '2023 Wimbledon Championships', 'summary': 'The 2023 Wimbledon Championships was a Grand Slam tennis tournament that took place at the All England Lawn Tennis and Croquet Club in Wimbledon, London, United Kingdom.', 'source': 'https://en.wikipedia.org/wiki/2023_Wimbledon_Championships'}),
 Document(page_content='place at the All England Lawn Tennis and Croquet Club in Wimbledon, London, United Kingdom.', metadata={'title': '2023 Wimbledon Championships', 'summary': 'The 2023 Wimbledon Championships was a Grand Slam tennis tournament that took place at the All England Lawn Tennis and Croquet Club in Wimbledon, London, United Kingdom.', 'source': 'https://en.wikipedia.org/wiki/2023_Wimbledon_Championships'}),
 Document(page_content='== Tournament ==', metadata={'title': '2023 Wimbledon Championships', 'summary': 'The 2023 Wimbledon Championships was 

In [70]:
#Store embeddings in ChromaDB

from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings

In [71]:
embeddings = OpenAIEmbeddings()

In [72]:
store = Chroma.from_documents(
        data,
        embeddings,
        ids=[f"{item.metadata['source']}-{index}" for index, item in enumerate(data)],
        collection_name="Wimbledon-Embeddings",
        persist_directory='db'
        )

store.persist()

In [73]:
# Asking Questions about Wimbledon 2023

from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
import pprint

In [74]:
prompt_template = """You are a Assistant that answers questions about Wimbledon 2023, using only the context provided.
If you don't know the answer, simply state that you don't know it


{context}

Question: {question}
"""

PROMPT = PromptTemplate(
            template=prompt_template,
            input_variables=["context", "question"]
        )

In [75]:
llm_model = ChatOpenAI(temperature=0, model="gpt-4")

In [76]:
qa_with_source = RetrievalQA.from_chain_type(
                llm=llm_model,
                chain_type="stuff",
                retriever=store.as_retriever(),
                chain_type_kwargs={"prompt": PROMPT},
                return_source_documents=True,
                )

In [77]:
pprint.pprint(qa_with_source("When and where was the Wimbledon 2023 held?"))

{'query': 'When and where was the Wimbledon 2023 held?',
 'result': 'The Wimbledon 2023 was held at the All England Lawn Tennis and '
           'Croquet Club in Wimbledon, London, United Kingdom from 3 to 16 '
           'July 2023.',
 'source_documents': [Document(page_content='at the All England Lawn Tennis and Croquet Club, Wimbledon, from 3 to 16 July 2023. Qualifying', metadata={'source': 'https://en.wikipedia.org/wiki/2023_Wimbledon_Championships', 'summary': 'The 2023 Wimbledon Championships was a Grand Slam tennis tournament that took place at the All England Lawn Tennis and Croquet Club in Wimbledon, London, United Kingdom.', 'title': '2023 Wimbledon Championships'}),
                      Document(page_content='2023. Qualifying matches were played from 26 to 29 June 2023 at the Bank of England Sports Ground', metadata={'source': 'https://en.wikipedia.org/wiki/2023_Wimbledon_Championships', 'summary': 'The 2023 Wimbledon Championships was a Grand Slam tennis tournament that t

In [78]:
pprint.pprint(qa_with_source("What was the venue of the tournaments?"))

{'query': 'What was the venue of the tournaments?',
 'result': 'The venue of the tournaments was the All England Lawn Tennis and '
           'Croquet Club in Wimbledon, London, United Kingdom.',
 'source_documents': [Document(page_content='The tournament was played on grass courts, with all main draw matches played at the All England', metadata={'source': 'https://en.wikipedia.org/wiki/2023_Wimbledon_Championships', 'summary': 'The 2023 Wimbledon Championships was a Grand Slam tennis tournament that took place at the All England Lawn Tennis and Croquet Club in Wimbledon, London, United Kingdom.', 'title': '2023 Wimbledon Championships'}),
                      Document(page_content='== Tournament ==', metadata={'source': 'https://en.wikipedia.org/wiki/2023_Wimbledon_Championships', 'summary': 'The 2023 Wimbledon Championships was a Grand Slam tennis tournament that took place at the All England Lawn Tennis and Croquet Club in Wimbledon, London, United Kingdom.', 'title': '2023 Wimbled