In [None]:
!pip install streamlit

In [None]:
!pip install langchain langchain-community langchain-openai streamlit unstructured

In [None]:
!pip install langchain langchain-community huggingface_hub


In [None]:
!pip install langchain-google-genai langchain

In [32]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m57.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0


In [39]:
import os
import streamlit as st
import pickle
import time
import langchain
# LLM
from langchain_community.llms import HuggingFaceHub,HuggingFaceEndpoint
from langchain_google_genai import ChatGoogleGenerativeAI

# Chains
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain

# Text splitting
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Document loading
from langchain_community.document_loaders import UnstructuredURLLoader

# Embeddings
from langchain_openai import OpenAIEmbeddings

# Vectorstore
from langchain_community.vectorstores import FAISS


In [13]:
from google.colab import userdata
hf_token = userdata.get('HUGGINGFACEHUB_API_TOKEN')

In [14]:
os.environ["HUGGINGFACEHUB_API_TOKEN"]=hf_token
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')


In [None]:
llm = HuggingFaceHub(
    repo_id="google/flan-t5-base",
    model_kwargs={
        "temperature": 0.5,
        "max_length": 128
    },
    task="text2text-generation",
    huggingfacehub_api_token=hf_token
)

response = llm.invoke("Tell me a fun fact about AI")
print(response)

In [15]:
chat_model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")


In [17]:
response = chat_model.invoke("Tell me a fun fact about AI")
print(response.content)

Some AI models are now so good at generating human-like text that they can even fool AI detectors designed to identify AI-generated content!  It's a bit of an AI arms race, with detectors constantly trying to catch up to the ever-evolving capabilities of AI text generators.


In [22]:
#Loading Data

loaders = UnstructuredURLLoader(urls=[
    "https://timesofindia.indiatimes.com/india/bengaluru-chinnaswamay-stadium-stampede-several-people-died-injured-rcb-victory-parade-karnataka-cm-siddaramaiah-latest-news/articleshow/121623434.cms",
    "https://www.thehindu.com/news/national/rcb-ipl-victory-parade-stampede-death-toll-virat-kohli-chinnaswamy-stadium-live/article69656707.ece"
    ])
data = loaders.load()
len(data)

2

In [45]:
data[0]

Document(metadata={'source': 'https://timesofindia.indiatimes.com/india/bengaluru-chinnaswamay-stadium-stampede-several-people-died-injured-rcb-victory-parade-karnataka-cm-siddaramaiah-latest-news/articleshow/121623434.cms'}, page_content='Edition\n\nIN\n\nIN\n\nUS\n\nEnglish\n\nEnglish\n\nहिन्दी\n\nमराठी\n\nಕನ್ನಡ\n\nதமிழ்\n\nবাংলা\n\nമലയാളം\n\nతెలుగు\n\nગુજરાતી\n\nTOI logo\n\nSign In\n\nTOI\n\nToday\'s ePaper\n\nNews\n\nIndia News\n\nAt least 11 dead, 33 hurt in Bengaluru stampede during RCB victory parade; how chaos & tragedy unfolded\n\nTrending\n\nIndia A vs England Lions\n\nMusk vs Trump\n\nUPSE Prelims Result\n\nComedk UGET Toppers List\n\nBengaluru Stampede News\n\nNirjala Ekadashi\n\nIndia Pakistan News\n\nSanskrit Phrase on IPL Trophy\n\nIndia A vs England Lions\n\nMusk vs Trump\n\nUPSE Prelims Result\n\nComedk UGET Toppers List\n\nBengaluru Stampede News\n\nNirjala Ekadashi\n\nIndia Pakistan News\n\nSanskrit Phrase on IPL Trophy\n\nIndia A vs England Lions\n\nMusk vs Trump\n\

In [24]:
#(2) Split data to create chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

In [26]:
len(docs)


81

In [28]:
docs[0]

Document(metadata={'source': 'https://timesofindia.indiatimes.com/india/bengaluru-chinnaswamay-stadium-stampede-several-people-died-injured-rcb-victory-parade-karnataka-cm-siddaramaiah-latest-news/articleshow/121623434.cms'}, page_content="Edition\n\nIN\n\nIN\n\nUS\n\nEnglish\n\nEnglish\n\nहिन्दी\n\nमराठी\n\nಕನ್ನಡ\n\nதமிழ்\n\nবাংলা\n\nമലയാളം\n\nతెలుగు\n\nગુજરાતી\n\nTOI logo\n\nSign In\n\nTOI\n\nToday's ePaper\n\nNews\n\nIndia News\n\nAt least 11 dead, 33 hurt in Bengaluru stampede during RCB victory parade; how chaos & tragedy unfolded\n\nTrending\n\nIndia A vs England Lions\n\nMusk vs Trump\n\nUPSE Prelims Result\n\nComedk UGET Toppers List\n\nBengaluru Stampede News\n\nNirjala Ekadashi\n\nIndia Pakistan News\n\nSanskrit Phrase on IPL Trophy\n\nIndia A vs England Lions\n\nMusk vs Trump\n\nUPSE Prelims Result\n\nComedk UGET Toppers List\n\nBengaluru Stampede News\n\nNirjala Ekadashi\n\nIndia Pakistan News\n\nSanskrit Phrase on IPL Trophy\n\nIndia A vs England Lions\n\nMusk vs Trump\n\n

In [30]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [36]:
#  Create embeddings for these chunks and save them to FAISS index

# Create the embeddings of the chunks using google embedding
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# Pass the documents and embeddings inorder to create FAISS vector index
vectorindex_google = FAISS.from_documents(docs, embeddings)

In [37]:
vectorindex_google

<langchain_community.vectorstores.faiss.FAISS at 0x7ad917066e40>

In [41]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=chat_model, retriever=vectorindex_google.as_retriever())

In [44]:
query = "What did the Prime Minister Narendra Modi tell about the RCB  Stampede issue"
# query = "what are the main features of punch iCNG?"

langchain.debug=True

chain({"question": query}, return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "What did the Prime Minister Narendra Modi tell about the RCB  Stampede issue"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "siddu.jpg\n\nCredit: X/CMO\n\nJune 04, 2025 19:46\n\nMishap in Bengaluru is absolutely heartrending: PM Modi\n\nJune 04, 2025 19:45\n\nGovt did stop roadshow in order to avoid stampede or any such situation: BCCI Vice-President\n\nJune 04, 2025 19:29\n\nLack of proper planning, govt must take full responsibility: H.D. Kumaraswamy\n\nJune 04, 2025 19:26\n\nRCB’s grand party to celebrate IPL triumph turns tragic due to crowd chaos\n\nChaotic celebra

{'answer': 'The provided text states that Prime Minister Modi described the Bengaluru mishap as "absolutely heartrending".  However,  the provided sources do not offer further details on his statement about the RCB stampede.\n\n',
 'sources': 'https://www.thehindu.com/news/national/rcb-ipl-victory-parade-stampede-death-toll-virat-kohli-chinnaswamy-stadium-live/article69656707.ece'}