In [None]:
import os;
from langchain_google_genai import GoogleGenerativeAI;
from langchain.chains import RetrievalQA;
from langchain.document_loaders import UnstructuredURLLoader;
from langchain_google_genai import GoogleGenerativeAIEmbeddings;
from langchain.vectorstores import FAISS;
from langchain.text_splitter import RecursiveCharacterTextSplitter;
from dotenv import load_dotenv

In [None]:
load_dotenv('.env')
API_KEY = os.environ['API_KEY']
llm = GoogleGenerativeAI(model="models/text-bison-001", google_api_key=API_KEY, temperature=0.9, max_output_tokens=500);

In [None]:
loaders = UnstructuredURLLoader(
    urls = [
        "https://www.moneycontrol.com/news/business/banks/hdfc-bank-re-appoints-sanmoy-chakrabarti-as-chief-risk-officer-11259771.html",
        "https://www.moneycontrol.com/news/business/markets/market-corrects-post-rbi-ups-inflation-forecast-icrr-bet-on-these-top-10-rate-sensitive-stocks-ideas-11142611.html"
    ]
);
data = loaders.load();
text_splitters = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 300
);
doc = text_splitters.split_documents(data);

In [None]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=API_KEY);
vector_index = FAISS.from_documents(doc, embeddings);
vector_index.save_local("faiss_index");

In [None]:
chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_index.as_retriever(), input_key="query", return_source_documents=True);
query = "What is the theme of the articles provided?"
chain(query)