In [1]:
import os
import openai

from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI

from dotenv import load_dotenv

load_dotenv() 

openai.api_key  = os.environ['OPENAI_API_KEY']

In [2]:
llm_name = "gpt-3.5-turbo"
data_path = 'data/sorted_chat.txt'

### Load data

In [3]:
# load the document and split it into chunks
loader = TextLoader(data_path)
documents = loader.load()

# split it into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)
len(docs)

1905

In [4]:
persist_directory = 'docs/chroma/'
embedding_function = OpenAIEmbeddings()

### Create vector database and save it disk

In [5]:
# load it into Chroma and save to disk
db = Chroma.from_documents(docs, embedding_function, persist_directory=persist_directory)

print(f"Number of chuncks: {db._collection.count()}")

Number of chuncks: 1905


### Check RAG chunks

In [6]:
# query it
query = 'Есть ли в отеле йогурт?'
docs = db.max_marginal_relevance_search(query, k=3, fetch_k=5)

# print results
for chunck in docs:
    print(chunck.page_content)
    print("#####################")

2024-02-14 03:14:44 Natalia None: Отель в составе испанской сети, обеспечить поставки йогуртов вполне возможно, хамон можно доставить, а йогурты нет, ну, странно. В Riu под управлением тех же испанцев еще несколько лет назад проблем не было. Понятно, что инфодемия подкосила многих, но надо налаживать какие-то базовые моменты хотя бы.
2024-02-14 05:09:18 Артём Б None: А сколько у вас детишек? И как часто вы ездите за границу?
#####################
2024-04-09 16:15:33 Anna Bakshaeva: Ребят, кто-то ездил на утренние экскурсии, брали завтрак с собой из отеля? Есть здесь как в других отелях боксы?
2024-04-09 18:21:13 Elena Kazakova: Мы ездили в Тринидад в 6 утра. Брали с завтрака йогурт в бутылочках и круассаны, а с ужина бутерброды с колбасой. Кофе можете взять с собой в баре. Боксов нет!
#####################
2024-03-27 22:46:10 Оксана Власова: Добрый вечер.Поделитесь,пожалуйста,где покупали ром?Сигары?
2024-03-28 04:04:52 Viktory M: Сколько вас было человек за эту сумму?
Всем хорошего дн

### Load vectordb from persist_directory saved above

In [7]:
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_function)
print(vectordb._collection.count())

1905


### Initialize LLM

In [8]:
llm = ChatOpenAI(model_name=llm_name, temperature=0.2)

### Run without memory

In [9]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """
You are a travel assistant for a major worldwide tourism company. You have access to customers chat of a given hotel. Here is the data: {context}"
Question: {question}
Helpful Answer:
"""

QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template)

# Run chain
from langchain.chains import RetrievalQA

query = 'Are there yogurts?'
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vectordb.as_retriever(search_type="mmr"),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})


result = qa_chain.invoke({"query": query})
result["result"]

"Yes, there are yogurts available in the hotel's restaurant. However, the guest mentioned that they are not from Spain, but rather some other type that they found to be chemically processed."

### Run with memory 

In [10]:
# add memory
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

In [11]:
from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever(search_type="mmr")
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory,
)

In [12]:
question = 'Are there yogurts?'
result = qa.invoke({"question": question})

print(result['answer'])

Yes, there are yogurts available in the hotel restaurant.


In [13]:
question = "Are they good?"
result = qa.invoke({"question": question})

print(result['answer'])

Based on the context provided, it is unclear whether the yogurts are good or not. One person mentioned that there are yogurts available, but there is a question about their edibility.


In [14]:
question = "What else can I eat for breakfast?"
result = qa.invoke({"question": question})

print(result['answer'])

Based on the provided information, other options available for breakfast might include yogurt in bottles, croissants, sandwiches with sausage, and smoothies made from papaya and guava. Additionally, there might be some chemical-tasting yogurts, water, lemonades, and tea. It seems like there is a limited selection of non-alcoholic beverages and breakfast items available.
