In [None]:
print("Good to GO!!!")

In [None]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

In [None]:
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()
    return documents

In [None]:
extracted_data = load_pdf('Data/')

In [None]:
print(extracted_data[15].page_content)

In [None]:
def text_chunking(data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    text_chunks = text_splitter.split_documents(data)
    return text_chunks

In [None]:
text_chunks = text_chunking(extracted_data)

In [None]:
text_chunks[0].page_content

In [None]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [None]:
embedding_model = download_hugging_face_embeddings()

In [None]:
embedding_model

In [None]:
query_result = embedding_model.embed_query("Hello World!")

In [None]:
query_result

In [None]:
persist_directory='db'

In [None]:
vector_store = Chroma.from_documents([text for text in text_chunks],embedding=embedding_model,persist_directory=persist_directory)

In [None]:
retriever  = vector_store.as_retriever()

In [None]:
docs = retriever.get_relevant_documents("Who was Harry's First Teacher?")

In [None]:
docs

In [None]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer or there is no context provided , just say don't know , don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [None]:
prompt = PromptTemplate(template=prompt_template,
                        input_variables=['context','question'])
chain_type_kwargs = {'prompt':prompt}

In [None]:
from huggingface_hub import hf_hub_download

repo_id = "TheBloke/Llama-2-7B-Chat-GGML"
filename = "llama-2-7b-chat.ggmlv3.q4_0.bin"

file_path = hf_hub_download(repo_id=repo_id, filename=filename)

print(f"File downloaded to: {file_path}")

In [None]:
llm = CTransformers(model=file_path,
                    model_type='llama',
                    config={'max_new_tokens':512,
                            'temperature':0.8})

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=vector_store.as_retriever(search_kwargs={'k':2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [None]:
while True:
    user_input = input(f"Input Prompt:")
    result = qa({'query':user_input})
    print("Response:",result['result'])