In [58]:
import pickle
import time
import pandas as pd
import langchain

from langchain import HuggingFaceHub
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEndpoint,ChatHuggingFace
from sentence_transformers import SentenceTransformer

from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace
from transformers import pipeline
import torch
import os
import streamlit as st
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_core.messages import HumanMessage

In [25]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "asdasd"
os.environ["GOOGLE_API_KEY"] = "asdasd"


In [26]:
llm_endpoint =  HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.2",
        # repo_id="tiiuae/falcon-7b-instruct", 
    # task="text-generation"
        task="conversational"
)

In [40]:
# llm = ChatHuggingFace(llm=llm_endpoint)
llm  = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.7)

In [41]:
embeddings =HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [42]:
loader = UnstructuredURLLoader(urls=["https://en.wikipedia.org/wiki/Alien","https://en.wikipedia.org/wiki/Pakistan","https://en.wikipedia.org/wiki/India"])
docs = loader.load()

In [43]:
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n","\n",".", " "],chunk_size=200, chunk_overlap=0)
docs = text_splitter.split_documents(docs)

In [44]:

vector_index_hugging_face = FAISS.from_documents(docs,embeddings)

In [45]:
file_path="vector_index.pkl"
with open(file_path,'wb') as f:
    pickle.dump(vector_index_hugging_face,f)

In [46]:
if os.path.exists(file_path):
    with open(file_path,'rb') as f:
        vector_index = pickle.load(f)

In [47]:
chain_map_reduce = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="map_reduce",
    retriever=vector_index.as_retriever(search_kwargs={"k": 5}),  # Can handle more docs
    return_source_documents=True,
    verbose=True  # Show the map-reduce process
)


In [48]:
prompt = 'what is the relation of india and pakistan'

In [53]:
langchain.debug=True

In [54]:
answer = chain_map_reduce({'question':prompt},return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "what is the relation of india and pakistan"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "countries and Pakistan",
      "question": "what is the relation of india and pakistan"
    },
    {
      "context": "Main article: China–Pakistan relations",
      "question": "what is the relation of india and pakistan"
    },
    {
      "context": ". The region has been the subject of a bitter dispute between India and Pakistan since they became independent in 1947",
      "question": "what is the relation of india and pakistan"
    },
    {
      "context": "... has been the

In [56]:
answer['answer']

'The provided text mentions a "bitter dispute" between India and Pakistan since their independence in 1947, and that the region has been the subject of this dispute since the partition of the Indian subcontinent in 1947.  More information is needed for a complete answer.\n\n'