In [2]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings  # ✅ Gemini LLM & Embeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.vectorstores import FAISS


In [3]:
api_key_path = r"C:\Users\gupta\GeminiApikey.txt" 


with open(api_key_path, "r") as file:
    api_key = file.read().strip() 


os.environ["GOOGLE_API_KEY"] = api_key


llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.7,max_tokens=500)

In [4]:
from langchain.document_loaders import UnstructuredURLLoader

loader = UnstructuredURLLoader(
    urls = [
         "https://www.moneycontrol.com/editors-picks/#google_vignette",
        "https://www.moneycontrol.com/sports/cricket/ipl/"
    ]
)
data = loader.load()
len(data)

2

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)


docs = text_splitter.split_documents(data)

In [6]:
len(docs)

42

## Create embeddings for these chunks and save them to FAISS index

In [7]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import FAISS

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") 

vectorindex_gemini = FAISS.from_documents(docs, embeddings)


In [12]:
import pickle

vectorindex_gemini.save_local("faiss_index")


In [17]:
import os
from langchain.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings

index_path = "faiss_index"

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

if os.path.exists(index_path):
    vectorIndex = FAISS.load_local(index_path, embeddings, allow_dangerous_deserialization=True)
    print("FAISS index loaded successfully! ✅")
else:
    print("FAISS index not found. Please generate and save it first. ❌")


FAISS index loaded successfully! ✅


## Retrieve similar embeddings for a given question and call LLM to retrieve final answer

In [18]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorIndex.as_retriever())
chain



In [20]:
query = "GT wins?"

langchain.debug=True

chain({"question": query}, return_only_outputs=True)


# it uses map reduce method

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "GT wins?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "Hyderabad\n\n6 Apr 2025\n\n07:30 pm\n\nSunrisers Hyderabad\n\nGujarat Titans\n\nMatch begins at 19:30 IST (14:00 GMT)\n\nMatch 14\n\nBengaluru\n\n2 Apr 2025\n\n07:30 pm\n\nRoyal Challengers Bengaluru\n\n169/8 (20.0 ov)\n\nGujarat Titans\n\n170/2 (17.5 ov)\n\nGujarat Titans beat Royal Challengers Bengaluru by 8 wickets\n\nMatch 13\n\nLucknow\n\n1 Apr 2025\n\n07:30 pm\n\nLucknow Super Giants\n\n171/7 (20.0 ov)\n\nPunjab Kings\n\n177/2 (16.2 ov)\n\nPunjab Kings beat Lucknow Super Giants by 8 wickets\n\nMatch 12\n\nMu

{'answer': 'Yes, Gujarat Titans (GT) won against Royal Challengers Bengaluru (RCB) by 8 wickets.\n',
 'sources': 'https://www.moneycontrol.com/sports/cricket/ipl/'}