In [None]:
#installations
%pip install streamlit
%pip install langchain faiss-cpu sentence-transformers
%pip install unstructured
%pip install langchain-community
%pip install -U langchain-google-genai
%pip install python-dotenv

In [None]:
#dependencies
import os
import langchain
from dotenv import load_dotenv
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain import OpenAI
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import pickle
from langchain.chains import RetrievalQAWithSourcesChain


#integrate api's
load_dotenv()
# Access the API key from environment variables
gemini_api_key = os.getenv("GEMINI_API_KEYS")
# Use it in your application
os.environ['GOOGLE_API_KEY'] = gemini_api_key


# Initialize the Gemini LLM
llm = ChatGoogleGenerativeAI(
    model="models/gemini-2.0-flash",
    temperature=0.9,
    max_tokens=100,
    google_api_key=os.environ["GOOGLE_API_KEY"]
)

#input on urls and make a data of it
urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html",
    "https://www.thehindu.com/news/national/india-bans-direct-indirect-imports-from-pakistan/article69533634.ece"

]

num_urls = int(input("How many URLs would you like to input? "))

for i in range(num_urls):
    url = input(f"Enter URL {i + 1}: ")
    urls.append(url)


loaders = UnstructuredURLLoader(urls=urls)
data = loaders.load()


#split the data
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

# Create the embeddings of the chunks using HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Pass the documents and embeddings inorder to create FAISS vector index
vectorindex_hf = FAISS.from_documents(docs, embedding_model)

# Storing vector index created in local
file_path="vector_index.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vectorindex_hf, f)

#retriving the stored vector
if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorIndex = pickle.load(f)

#set up a Retrieval-Augmented Generation (RAG) chain using LangChain.
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorIndex.as_retriever())

In [None]:
#query your question
query = "What did India ban?.Explain the situation in breif"
langchain.debug = False
chain({"question": query}, return_only_outputs=True)