In [28]:
# 1. Load and split documents

from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("IPL_Teams.pdf")
docs = loader.load()
docs

[Document(metadata={'producer': 'PyFPDF 1.7.2 http://pyfpdf.googlecode.com/', 'creator': 'PyPDF', 'creationdate': 'D:20250318110902', 'source': 'IPL_Teams.pdf', 'total_pages': 137, 'page': 0, 'page_label': '1'}, page_content="Chennai Super Kings\nT20 kit\nChennai  Super  Kings  (CSK)  is  an  Indian  professional  T20  cricket  franchise  based  in\nChennai, Tamil Nadu. The team competes in the Indian Premier League (IPL) and was\none of the eight franchises incorporated when the league was established in 2008. The\nteam  plays  its  home  matches  at  the  M.  A.  Chidambaram  Stadium  and  is  owned  by\nChennai Super Kings Cricket.\nThe  Super  Kings  is  the  joint-most  successful  IPL  franchise,  having  won  five  IPL  titles\n(along with Mumbai Indians). In the IPL, it has appeared in a 10 finals and qualified for\nthe playoff stages 12 times, the most amongst the IPL teams. The franchise has also\nwon the Champions League Twenty20 twice in 2010 and 2014. The team is currently

In [29]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
documents = text_splitter.split_documents(docs)

In [30]:
# 2. Generate embeddings

from langchain_openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

In [31]:
# 3. Store in Milvus

from langchain_community.vectorstores import Milvus

milvus_vectorstore = Milvus.from_documents(
    documents,
    embedding,
    connection_args={
        "host": "localhost",  # or your IP
        "port": "19530",
    },
    collection_name="ipl_docs"
)

In [32]:
# 4. Prepare the LLM

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4", temperature=0.7)

In [33]:
# 5. Prepare the prompt template

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
You are an expert on Indian Premier League (IPL) cricket teams and players.

Answer the user's question using only the provided context. 
Include team names, player names, statistics, and relevant match details where applicable.
Think step-by-step before responding. If the context does not include the answer, respond with "The context does not contain this information."

<context>
{context}
</context>

Question: {input}

Answer:
""")


In [34]:
# 6. Create retriever
retriever = milvus_vectorstore.as_retriever()

In [39]:
# 7. Define a step to fetch context from retriever

def retrieve_context(inputs):
    docs = retriever.invoke(inputs["input"])
    return {"context": "\n\n".join([doc.page_content for doc in docs]), "input": inputs["input"]}

In [40]:
# 8. Create output parser

from langchain_core.output_parsers import StrOutputParser 

parser = StrOutputParser()

In [41]:
# 9. Chain: input -> retrieve -> format prompt -> LLM -> parse

from langchain_core.runnables import Runnable

# Wrap retriever in a custom Runnable that returns the expected input for the prompt
class CustomRetriever(Runnable):
    def invoke(self, input, config=None):
        docs = retriever.invoke(input["input"])
        return {"context": "\n\n".join(doc.page_content for doc in docs), "input": input["input"]}  

# Use it in the chain
chain = (
    CustomRetriever() |
    prompt |
    llm |
    parser
)


In [46]:
# 10. Run the chain
response = chain.invoke({"input": "tell me about csk?"})
print(response)

Chennai Super Kings (CSK) is a professional T20 cricket franchise based in Chennai, Tamil Nadu. They compete in the Indian Premier League (IPL) and were one of the eight franchises incorporated when the league was established in 2008. The team plays its home matches at the M. A. Chidambaram Stadium and is owned by Chennai Super Kings Cricket. 

CSK is one of the most successful IPL franchises, having won five IPL titles along with Mumbai Indians. It has appeared in 10 finals and qualified for the playoff stages 12 times, the most amongst the IPL teams. The franchise has also won the Champions League Twenty20 twice in 2010 and 2014. The team is currently captained by Ruturaj Gaikwad and coached by Stephen Fleming. 

However, the Super Kings faced a two-year suspension from the IPL starting from July 2015. In a recent season, they were at the bottom of the table for most of the time, with their play-offs hopes crushed after a defeat against Mumbai Indians. They finished that season 7th i