# Importing the required modules

In [63]:
import wikipedia
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

# Initializing the LLM

In [26]:
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

# Observing the output before any context is provided to the LLM

In [29]:
llm.invoke("How are you today?")

AIMessage(content="I'm just a computer program, so I don't have feelings, but I'm here and ready to assist you. How can I help you today?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 12, 'total_tokens': 43}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-60e0ef38-dfc3-4c22-867a-33205dd62b56-0', usage_metadata={'input_tokens': 12, 'output_tokens': 31, 'total_tokens': 43})

In [31]:
llm.invoke("How many medals did India win at Paralympics 2024?")

AIMessage(content="I am unable to provide real-time information as I am an AI assistant. Please refer to the official Paralympics website or news sources for the most up-to-date information on India's medal count at the 2024 Paralympic Games.", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 51, 'prompt_tokens': 22, 'total_tokens': 73}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-2be8be9b-f594-45d6-9150-05a5d8b9b677-0', usage_metadata={'input_tokens': 22, 'output_tokens': 51, 'total_tokens': 73})

In [52]:
llm.invoke("Did Indian cricket team win world cup recently?")

AIMessage(content='No, the Indian cricket team has not won the ICC Cricket World Cup recently. Their last World Cup victory was in 2011 when they won the tournament held in India, Sri Lanka, and Bangladesh. Since then, they have not been able to win the World Cup.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 55, 'prompt_tokens': 16, 'total_tokens': 71}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-e0e536dd-7f7f-430f-857f-d4a47692bb28-0', usage_metadata={'input_tokens': 16, 'output_tokens': 55, 'total_tokens': 71})

# Getting context about an article from Wikipedia

In [32]:
data = wikipedia.page("India at the Paralympics")

In [36]:
print(data.content[:200])
print(len(data.content))

India first participated in the 1968 Summer Paralympics. The nation has appeared in every edition of the Summer Paralympics since 1984. The Paris 2024 Games marked India's 13th appearance at the Paral
7011


# Providing context to model and creating a chain

Providing context to the model

In [41]:
user_prompt = PromptTemplate.from_template(
    """
    You are provided information about the topic. Answer the question asked by the user.
    Context: {data}
    Question: {question}
    Answer:
    """
)

In [42]:
chain = user_prompt | llm | StrOutputParser()

In [44]:
chain.invoke({
    "data": data.content,
    "question": "How many medals did India win at Paralympics 2024?"
})

'India won a total of 29 medals at the Paralympics 2024, including seven gold, nine silver, and thirteen bronze medals.'

# Part 2: Taking multiple documents and using FAISS to query

Here we get the information about relevant topics from Wikipedia, split the large docs into chunks, convert the text to vectors and store them in FAISS Vector database for further use

In [51]:
doc = Document(page_content=data.content)

In [55]:
topics = [
    "India at the Paralympics",
    "India national cricket team"
]

In [56]:
docs = []
for topic in topics:
    data = wikipedia.page(topic)
    doc = Document(page_content=data.content)
    docs.append(doc)

In [59]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)

In [60]:
splits = text_splitter.split_documents(docs)

In [62]:
len(splits)

80

In [64]:
vector_store = FAISS.from_documents(documents=splits, embedding=OpenAIEmbeddings())

In [69]:
query = "Medals won by India at Paralympics"
result = vector_store.similarity_search(query)

In [70]:
retriever = vector_store.as_retriever()

In [74]:
retriever.invoke("India medals at Paralympics")

[Document(page_content="India sent its largest contingent for the 2024 Games consisting of 84 people competing across 12 sports. India has won 29 medals with seven gold, nine silver, and thirteen bronze medals. Avani Lekhara won a gold medal in shooting for the second consecutive Games and became the first Indian woman paralympic athlete to win back-to-back gold and multiple medals. Kumar Nitesh won a gold medal in badminton at the men's singles SL3 event. Sumit Antil became the first Indian male athelete to defend a title at the Paralympics after he won the gold medal in the men's javelin throw F64 event, while setting two Paralympic records in the process."),
 Document(page_content='In badminton, Indian shuttlers won four medals including two gold medals by Pramod Bhagat and Krishna Nagar, a silver medal by Suhas Yathiraj and a bronze medal by Manoj Sarkar. In athletics, India won eight medals including one gold, five silver and two bronze medals. Sumit Antil won a gold medal in Jave

Creating a chain that fetched the documents from vector database as context and generates the output

In [76]:
user_prompt = PromptTemplate.from_template(
    """
    You are an AI assistant who can answer the question based on the context provided
    Context: {context}
    Question: {question}
    Answer:
    """
)

In [77]:
user_prompt

PromptTemplate(input_variables=['context', 'question'], template='\n    You are an AI assistant who can answer the question based on the context provided\n    Context: {context}\n    Question: {question}\n    Answer:\n    ')

In [82]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | user_prompt
    | llm
    | StrOutputParser()
)

In [83]:
chain.invoke("Medals by India at Paralympics")

'India has won a total of 29 medals at the Paralympic Games, including seven gold, nine silver, and thirteen bronze medals. Some notable achievements include Avani Lekhara winning back-to-back gold medals in shooting, Kumar Nitesh winning a gold in badminton, Sumit Antil defending his title in javelin throw, and Indian shuttlers winning four medals in badminton.'