In [52]:
### importing libraries
import os
from dotenv import load_dotenv
from langchain_community.llms import Ollama
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
import bs4
from langchain_huggingface import HuggingFaceEmbeddings

In [53]:
### loading environment variables
load_dotenv()
os.environ['HF_TOKEN']=os.getenv("HF_TOKEN")

In [54]:
### initializing llm model
llm=Ollama(model="gemma3:1b")

In [55]:
### initializing Embedding model
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [56]:
### load, chunk and create indexes of this blog to create a retriever
loader=WebBaseLoader(
    web_paths=("https://www.cricbuzz.com/cricket-news/135464/bangladesh-cricket-board-news-bcb-elections-set-for-early-october-amid-conflict-of-interest-concerns",),
)

docs=loader.load()
docs

[Document(metadata={'source': 'https://www.cricbuzz.com/cricket-news/135464/bangladesh-cricket-board-news-bcb-elections-set-for-early-october-amid-conflict-of-interest-concerns', 'title': 'BCB elections set for early October amid conflict of interest concerns | Cricbuzz.com', 'description': 'Though the date is yet to be released, it is understood that the election might take place on October 4', 'language': 'en'}, page_content='\n BCB elections set for early October amid conflict of interest concerns | Cricbuzz.com  ✖Live ScoresScheduleArchivesNewsAll Stories  Premium Editorials Latest NewsTopicsSpotlightOpinionsSpecialsStats & AnalysisInterviewsLive BlogsHarsha BhogleSeries  Asia Cup 2025 South Africa tour of England, 2025 Caribbean Premier League 2025 The Hundred Mens Competition 2025 Australia Women tour of India, 2025 ICC Womens World Cup 2025 India tour of England, 2025 South Africa tour of Australia, 2025 The Hundred Womens Competition 2025 Duleep Trophy 2025 All Series »Teams   

In [57]:
### chunking documents
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
splits=text_splitter.split_documents(docs)

In [58]:
splits

[Document(metadata={'source': 'https://www.cricbuzz.com/cricket-news/135464/bangladesh-cricket-board-news-bcb-elections-set-for-early-october-amid-conflict-of-interest-concerns', 'title': 'BCB elections set for early October amid conflict of interest concerns | Cricbuzz.com', 'description': 'Though the date is yet to be released, it is understood that the election might take place on October 4', 'language': 'en'}, page_content='BCB elections set for early October amid conflict of interest concerns | Cricbuzz.com  ✖Live ScoresScheduleArchivesNewsAll Stories  Premium Editorials Latest NewsTopicsSpotlightOpinionsSpecialsStats & AnalysisInterviewsLive BlogsHarsha BhogleSeries  Asia Cup 2025 South Africa tour of England, 2025 Caribbean Premier League 2025 The Hundred Mens Competition 2025 Australia Women tour of India, 2025 ICC Womens World Cup 2025 India tour of England, 2025 South Africa tour of Australia, 2025 The Hundred Womens Competition 2025 Duleep Trophy 2025 All Series »Teams   Tes

In [59]:
### initializing vectordatabase
vectorstore=Chroma.from_documents(documents=splits,embedding=embeddings)
retriever=vectorstore.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x0000026C9B44D5B0>, search_kwargs={})

In [60]:
### prompt
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [61]:
### creating chain
question_answer_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(retriever,question_answer_chain)

In [62]:
response=rag_chain.invoke({"input":"what this Blog telling about?"})
response

{'input': 'what this Blog telling about?',
 'context': [Document(id='be101c3a-5589-4bf7-9283-1531dc18ace0', metadata={'source': 'https://www.cricbuzz.com/cricket-news/135464/bangladesh-cricket-board-news-bcb-elections-set-for-early-october-amid-conflict-of-interest-concerns', 'title': 'BCB elections set for early October amid conflict of interest concerns | Cricbuzz.com', 'description': 'Though the date is yet to be released, it is understood that the election might take place on October 4', 'language': 'en'}, page_content='We use cookies to improve your experience on our site and to show you non-personalized ads. Find out more in our privacy policy and cookie policyOKMOBILE SITE & APPSm.cricbuzz.comAndroidiOSFOLLOW US ONfacebooktwitteryoutubePinterestCOMPANYCareersAdvertiseCricbuzz TV AdsPrivacy Preferences{{ link.name }}© 2025 Cricbuzz.com, Cricbuzz Platforms Limited. All rights reserved | The Times of India | Navbharat TimesMove to top'),
  Document(id='aeec4dd2-c72e-44ee-83e0-80eb3

In [63]:
response['answer']

'The blog is discussing Cricbuzz’s election process, particularly regarding the upcoming election for the Board of Directors of the Cricket Board of Bangladesh.'