In [31]:
import os
from langchain_ollama import OllamaEmbeddings
import faiss
import numpy as np
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import warnings
warnings.filterwarnings("ignore")

In [32]:
# Load data
loader = CSVLoader('data/hotel_bookings_cleaned.csv')
data = loader.load()

In [33]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
text_splitter.split_documents(data)[:5]

[Document(metadata={'source': 'data/hotel_bookings_cleaned.csv', 'row': 0}, page_content=': 0\nhotel: Resort Hotel\nis_canceled: 0\nlead_time: 342\narrival_date_year: 2015\narrival_date_month: July\narrival_date_week_number: 27\narrival_date_day_of_month: 1\nstays_in_weekend_nights: 0\nstays_in_week_nights: 0\nadults: 2\nchildren: 0\nbabies: 0\nmeal: BB\ncountry: PRT\nmarket_segment: Direct\ndistribution_channel: Direct\nis_repeated_guest: 0\nprevious_cancellations: 0\nprevious_bookings_not_canceled: 0\nreserved_room_type: C\nassigned_room_type: C\nbooking_changes: 3\ndeposit_type: No Deposit\nagent: 9'),
 Document(metadata={'source': 'data/hotel_bookings_cleaned.csv', 'row': 0}, page_content='reserved_room_type: C\nassigned_room_type: C\nbooking_changes: 3\ndeposit_type: No Deposit\nagent: 9\ndays_in_waiting_list: 0\ncustomer_type: Transient\nadr: 0\nrequired_car_parking_spaces: 0\ntotal_of_special_requests: 0\nreservation_status: Check-Out\nreservation_status_date: 01-07-2015\nis_cor

In [34]:
documents = text_splitter.split_documents(data)
documents[:5]

[Document(metadata={'source': 'data/hotel_bookings_cleaned.csv', 'row': 0}, page_content=': 0\nhotel: Resort Hotel\nis_canceled: 0\nlead_time: 342\narrival_date_year: 2015\narrival_date_month: July\narrival_date_week_number: 27\narrival_date_day_of_month: 1\nstays_in_weekend_nights: 0\nstays_in_week_nights: 0\nadults: 2\nchildren: 0\nbabies: 0\nmeal: BB\ncountry: PRT\nmarket_segment: Direct\ndistribution_channel: Direct\nis_repeated_guest: 0\nprevious_cancellations: 0\nprevious_bookings_not_canceled: 0\nreserved_room_type: C\nassigned_room_type: C\nbooking_changes: 3\ndeposit_type: No Deposit\nagent: 9'),
 Document(metadata={'source': 'data/hotel_bookings_cleaned.csv', 'row': 0}, page_content='reserved_room_type: C\nassigned_room_type: C\nbooking_changes: 3\ndeposit_type: No Deposit\nagent: 9\ndays_in_waiting_list: 0\ncustomer_type: Transient\nadr: 0\nrequired_car_parking_spaces: 0\ntotal_of_special_requests: 0\nreservation_status: Check-Out\nreservation_status_date: 01-07-2015\nis_cor

In [35]:
from langchain_community.vectorstores import FAISS

In [36]:
db = FAISS.from_documents(documents,OllamaEmbeddings(model="nomic-embed-text"))

In [37]:
db.save_local('')

In [38]:
query = "In which year adr is maximum"
result = db.similarity_search(query)
result[0].page_content

'reserved_room_type: D\nassigned_room_type: D\nbooking_changes: 0\ndeposit_type: No Deposit\nagent: 240\ndays_in_waiting_list: 0\ncustomer_type: Transient\nadr: 139.5\nrequired_car_parking_spaces: 1\ntotal_of_special_requests: 1\nreservation_status: Check-Out\nreservation_status_date: 08-07-2016\nis_corporate: 0\nrevenue: 1953'

In [39]:
from langchain_community.llms import Ollama

llm = Ollama(model="llama2")
llm

Ollama()

In [40]:
## Design a chat prompt template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context.
Think step by step before providing a detailed answer.
I will tip you $1000 if the user finds the answer helpful.
NO PREAMBLE
<context>           
{context}
</context> 
Question: {input}
""")


In [41]:
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm,prompt)

In [42]:
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000232C7775370>, search_kwargs={})

In [43]:
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever,document_chain)

In [46]:
response = retrieval_chain.invoke({"input":"Highest Revenue in which year "})

In [47]:
response['answer']

'Based on the provided context, the highest revenue is $1408.96, which belongs to the reservation made in 2016 for the following reasons:\n\n1. The arrival date is in September 2016, which is the highest revenue-generating month based on the provided data.\n2. The reservation was made through a corporate distribution channel, which generally generates higher revenue than other channels.\n3. The reservation has no cancellations or previous bookings not canceled, indicating that the guest is likely to honor their reservation and generate revenue for the hotel.\n4. The reserved room type is "G", which is the highest-revenue generating room type based on the provided data.\n\nTherefore, the answer to the question is 2016.'