In [None]:
import os
import openai
import sys
sys.path.append('../..')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

In [None]:
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationalRetrievalChain

# Sample Documents
documents = [
            Document(page_content="Devsloop is a software development firm based in Gujranwala, Pakistan, with a team "
                                  "of 20 employees. The company is led by CEO Ammad Javaid, with Shehwar Khalid as a "
                                  "partner. Devsloop has managed Adly clients and has worked on notable projects such "
                                  "as Beambox, Rapid Translate, Pageflows, Upcall, and Easylama"),
            Document(page_content="It has an HR manager named Emama Babur, and salaries are disbursed on the 1st of "
                                  "each month. Devsloop provide a machine either MAC or Laptop to all employees"),
            Document(page_content="My name is Salman Ahmad. I am learning Data Science. I joined the company in "
                                  "september 2023. I did Graduation from Gift University")
        ]

# Text Splitting
splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=5,
    separators=["(?<=\. )", " "]
)

splits = splitter.split_documents(documents)

# Defining Directory
persist_directory = 'docs/chroma/'

!rm -rf ./docs/chroma  # remove old database files if any

# Embedding
embedding = OpenAIEmbeddings()

# Storing documents in ChromaDB
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding,
    persist_directory=persist_directory
)

# Defining LLM Model
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# Build prompt
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Memory for chat history
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# Retrieving documents from ChromaDB
retriever=vectordb.as_retriever(
    search_type="mmr",
    search_kwargs={'k': 4,'fetch_k': 8}
)

qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={
        "prompt": QA_CHAIN_PROMPT,
    },
)

# Asking Question from the ChatBot on custom data
question = "How many employees work in devsloop?"
result = qa({"question": question})

# Showing the results
result['answer']