## This document is dedicated for creating a dtabase and a retriever with a file that will be used by chatbot to answer a question

In [1]:
#  Here we load a document into a chroma db
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.chroma import Chroma
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv

In [2]:
load_dotenv()


text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=200,
    chunk_overlap=0
    
)

embeddings = OpenAIEmbeddings()


loader = TextLoader("./data/facts.txt")

docs = loader.load()

splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(), persist_directory="./app/chroma_db")
    
retriever = vectorstore.as_retriever()

vectorstore.persist()

In [3]:

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

template = 'Please answer the follwoing question: {question} based on the provided context: {context}'

prompt = PromptTemplate.from_template(template)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print(rag_chain.invoke("What is the text about?"))

The text is about interesting and random facts.
