### Import all necessity library 

In [None]:
import os
import warnings
from dotenv import load_dotenv


from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.embeddings import OpenAIEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

warnings.filterwarnings("ignore")

load_dotenv()

### Initialize the parameters

In [None]:
chunk_size = 1000
chunk_overlap = 200
PDFs_PATH = "../../data/PDFs"
persist_directory = "../../db"

### Define the OpenAI APIs

In [None]:
try:
    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
except Exception as e:
    print("An error occurred:".capitalize(), e)
    raise 

### Utils

In [None]:
import yaml
import joblib


def dump(value=None, filename=None):
    if (value is not None) and (filename is not None):
        joblib.dump(value=value, filename=filename)

    else:
        raise Exception("value and filename must be provided".capitalize())


def load(filename=None):
    if filename is not None:
        return joblib.load(filename=filename)

    else:
        raise Exception("Filename should be passed".capitalize())


def config():
    with open("./config.yml", "r") as file:
        return yaml.safe_load(file)

### Extract the dataset

In [None]:
loader = DirectoryLoader(
    path=PDFs_PATH,
    glob="**/*.pdf",
    use_multithreading=True,
    loader_cls=PyPDFLoader
)

documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap
)

documents = text_splitter.split_documents(documents = documents)

### Store the tokens into VectorDB

In [None]:
vectordb = Chroma.from_documents(
    documents=documents,
    embedding=OpenAIEmbeddings(),
    persist_directory=persist_directory
)

### Persist DB to Disk

In [None]:
def access_to_db(self):
    self.database = Chroma(
        embedding_function=OpenAIEmbeddings(),
        persist_directory=self.CONFIG["path"]["DATABASE_PATH"]
    )

    return self.database

### Prompt Initialise

In [None]:
template = """
Use the following context and chat history to answer the question:
------
Context:
{context}
------
Chat History:
{history}
------
Question:
{question}
Answer:
"""

In [None]:
def chatReceipe(self):
    self.database = self.access_to_db()

    self.retriever = self.database.as_retriever(
            search_kwargs={"k": self.CONFIG["retriever"]["k"]}
    )

    self.prompt = PromptTemplate(
        input_variables=["context", "question", "history"], template=template
    )
    self.memory = ConversationBufferMemory(
        input_key="question", memory_key="history"
    )

    self.chain = RetrievalQA.from_chain_type(
        llm=OpenAI(
            temperature=self.CONFIG["OpenAI"]["temperature"],
            model_name=self.CONFIG["OpenAI"]["model"],
            openai_api_key=self.access_api_key(),
        ),
        chain_type="stuff",
        retriever=self.retriever,
        chain_type_kwargs={"prompt": self.prompt, "memory": self.memory},
    )

    while self.chat_limit != 0:
        inputs = input("Query: ")
        result = self.chain(inputs=inputs)["result"]

        print("Answer:", result)

        self.chat_limit -= 1

    print("You have crossed the limit for today, Have a nice day !".capitalize())