In [27]:
import os
import streamlit as st
import pickle
import time
import langchain
from langchain_groq import ChatGroq
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [35]:
load_dotenv()  # By default looks for .env in current directory

# Get the API key
groq_api_key = os.getenv("GROQ_API_KEY")

if not groq_api_key:
    raise ValueError("GROQ_API_KEY not found in environment variables. Please create a .env file with your key.")

llm = ChatGroq(
    api_key=os.getenv("GROQ_API_KEY"),
    model_name="llama3-70b-8192",  # or "mixtral-8x7b-32768"
    temperature=0.7  # Optional: controls creativity
)

loader = UnstructuredURLLoader(
    urls = [
        "https://en.wikipedia.org/wiki/Independence_Day_(Pakistan)",
        "https://nationaltoday.com/pakistan-independence-day/"
    ]
)
data = loader.load()
len(data)

2

In [36]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)
len(docs)

78

In [37]:
from langchain_community.embeddings import HuggingFaceEmbeddings

# Initialize LangChain-compatible embedding model
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Automatically embed documents and create FAISS index
vector_index = FAISS.from_documents(docs, embedding_model)

In [None]:
# Storing vector index create in local
file_path="vector_index.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vector_index, f) # created vector DataBase

In [39]:
if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorIndex = pickle.load(f)

In [40]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorIndex.as_retriever())
chain



In [41]:
query = "when is the independence of pakistan?"
# query = "what are the main features of punch iCNG?"

langchain.debug=True

chain({"question": query}, return_only_outputs=True) # the context we see is the similar chunks from my query.

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "when is the independence of pakistan?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "On 14 August 1947, the new Dominion of Pakistan became independent and Muhammad Ali Jinnah was sworn in as its first governor general in Karachi.[24] Independence was marked with widespread celebration, but the atmosphere remained heated given the communal riots prevalent during independence in 1947.[7]\n\nThe date of independence\n\n[edit]\n\nSince the transfer of power took place on the midnight of 14 and 15 August, the Indian Independence Act 1947 recognized 15 August as the birthda

{'answer': 'FINAL ANSWER: August 14, 1947.\n',
 'sources': 'https://en.wikipedia.org/wiki/Independence_Day_(Pakistan), https://nationaltoday.com/pakistan-independence-day/'}