In [None]:
import pandas as pd
import numpy as np

import os
import sys

from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import LlamaCpp
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain_community.llms import LlamaCpp

In [17]:
# loader = PyPDFDirectoryLoader("/data_files/")
loader = PyPDFDirectoryLoader("D:\\Projects\\All Purpose Chatbot\\Mistral_Implementation\\data_files")
data = loader.load()

In [None]:
data

## Chunking

In [19]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 20)

text_chunks = text_splitter.split_documents(data)

In [None]:
len(text_chunks)

In [None]:
type(text_chunks[0])

In [None]:
text_chunks[0]

In [None]:
text_chunks[1]

## Encoder fetch and generation

In [None]:
embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")

In [21]:
vector_store = FAISS.from_documents(text_chunks, 
                                    embedding = embeddings
                                    )

In [None]:
llm = LlamaCpp(
    streaming = True,
    model_path = "D:\\Projects\\All Purpose Chatbot\\Mistral_Implementation\\mistral-7b-instruct-v0.1.Q2_K.gguf",
    temperature = 0.75,
    top_p = 1,
    verbose = True,
    n_ctx = 4096
)

In [12]:
# from langchain.prompts import PromptTemplate

In [13]:
# prompt_template = PromptTemplate(
#     input_variables=["context", "question"],
#     template=(
#         "You are a knowledgeable assistant. Answer the question based on the following context.\n\n"
#         "Context:\n{context}\n\n"
#         "Question: {question}\n\n"
#         "If the answer is not in the context, say 'Sorry, I don't have answer for that.'."
#     )
# )

In [22]:
qa = RetrievalQA.from_chain_type(llm = llm, 
                                 chain_type = "stuff", 
                                 retriever = vector_store.as_retriever(search_kwargs = {"k": 2})
                                 )

In [15]:
# qa = RetrievalQA.from_chain_type(llm = llm, 
#                                  chain_type = "stuff", 
#                                  retriever = vector_store.as_retriever(search_kwargs = {"k": 2}),
#                                  chain_type_kwargs = {"prompt": prompt_template}
#                                  )

In [None]:
question = "Who made Saturn V?"
qa.run(question)

In [None]:
question = "I want to know about buying any property?"
qa.run(question)

In [None]:
question = "Who is Megha?"
qa.run(question)

In [None]:
question = "Who are Rahul and Dhruv?"
qa.run(question)

In [None]:
question = "How can we buy real estate property from Dhruv?"
qa.run(question)

In [None]:
question = "How to rent a room?"
qa.run(question)

In [None]:
question2 = "What was the name of the station that Saturn V launched and how many stages did it have?"
qa.run(question2)

In [None]:
question2 = "Who is Sachin Tendulkar?"
qa.run(question2)

In [19]:
# question2 = "Who is Sachin Tendulkar?"
# qa.run(question2)

## Chatbot Loop

In [20]:
# while True:
#   user_input = input(f"Input Prompt: ")
#   if user_input == 'exit':
#     print('Exiting')
#     sys.exit()
#   if user_input == '':
#     continue
#   result = qa({'query': user_input})
#   print(f"Answer: {result['result']}")

TODO: Automatic for loop for testing

# Trying Logging

In [21]:
# # Function to log intermediate outputs
# def log_intermediate_steps(qa_chain, question):
#     retriever = qa_chain.retriever
#     docs = retriever.get_relevant_documents(question)
#     context = "\n".join([doc.page_content for doc in docs])
#     prompt = qa_chain.llm_chain.prompt.format(context=context, question=question)
    
#     print("Retrieved Documents:\n", context)
#     print("Generated Prompt:\n", prompt)
    
#     return context, prompt

In [23]:
def log_intermediate_steps(retriever, question):
    docs = retriever.get_relevant_documents(question)
    context = "\n".join([doc.page_content for doc in docs])
    prompt = prompt_template.format(context=context, question=question)
    
    print("Retrieved Documents:\n", context)
    print("Generated Prompt:\n", prompt)
    
    return context, prompt

In [None]:
question = "Who made Saturn V?"
context, prompt = log_intermediate_steps(qa, question)
answer = qa.run(question)

print("Answer:\n", answer)