In [None]:
import os 
import math 
import time
import streamlit as st
import  numpy as np


from langchain.models import LLM

from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate

from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFaceHub

from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

In [None]:
#read the pdfs from the folder


loader = PyPDFDirectoryLoader("./credit_card")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)

final_documents = text_splitter.split_documents(documents)
final_documents[0]

In [None]:
len(final_documents)

In [None]:
#embedding using Huggingface


huggingface_embeddings = HuggingFaceBgeEmbeddings(

    model_name = "BAAI/bge-small-en-v1.5",      #sentence-transformers/all-MiniLM-l6-v2
    model_kwargs = {'device':'cpu'},
    encode_kwargs = {'normalize_embeddings':True}

)

In [None]:
print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)))
print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)).shape)

In [None]:
#vectorStore creation


vectorstore = FAISS.from_documents(final_documents[:120], huggingface_embeddings)

In [None]:
#query using similarity search

query = "What factors affect customer satisfaction?"
relevant_docments=vectorstore.similarity_search(query)

print(relevant_docments[0].page_content)

In [None]:
retriever = vectorstore.as_retriever(search_type = "similarity", search_kwargs = {"k":3})
print(retriever)

In [None]:
os.environ['HUGGINGFACEHUB_API_TOKEN']=""

In [None]:
hface = HuggingFaceHub(

    repo_id = "mistralai/Mistral-7B-v0.1",
    model_kwargs = {"temperature":0.1,"max_length":500}

)
query = "What is American Express?"
hface.invoke(query)

In [None]:
#Hugging Face models can be run locally through the HuggingFacePipeline class.


hface = HuggingFacePipeline.from_model_id(

    model_id = "mistralai/Mistral-7B-v0.1",
    task = "text-generation",
    pipeline_kwargs = {"temperature": 0, "max_new_tokens": 300}
)

llm = hface 
llm.invoke(query)

In [None]:
prompt_template = """
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

{context}
Question:{question}

Helpful Answers:
"""

In [None]:
prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])  

In [None]:
retrievalQA = RetrievalQA.from_chain_type(
    
    llm = hface,
    chain_type = "stuff",
    retriever = retriever,
    return_source_documents = True,
    chain_type_kwargs = {"prompt": prompt}
)

In [None]:
query = """What are the key findings in 2023"""

In [None]:
#call the Q&A chain with our query.

result = retrievalQA.invoke({"query": query})
print(result['result'])