In [1]:
import os
from langchain_ollama import OllamaEmbeddings
import faiss
import numpy as np
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [2]:
# Load data
loader = CSVLoader('hotel_bookings_cleaned.csv')
data = loader.load()

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
text_splitter.split_documents(data)[:5]

[Document(metadata={'source': 'hotel_bookings_cleaned.csv', 'row': 0}, page_content=': 0\nhotel: Resort Hotel\nis_canceled: 0\nlead_time: 342\narrival_date_year: 2015\narrival_date_month: July\narrival_date_week_number: 27\narrival_date_day_of_month: 1\nstays_in_weekend_nights: 0\nstays_in_week_nights: 0\nadults: 2\nchildren: 0.0\nbabies: 0\nmeal: BB\ncountry: PRT\nmarket_segment: Direct\ndistribution_channel: Direct\nis_repeated_guest: 0\nprevious_cancellations: 0\nprevious_bookings_not_canceled: 0\nreserved_room_type: C\nassigned_room_type: C\nbooking_changes: 3\ndeposit_type: No Deposit\nagent: 9\ndays_in_waiting_list: 0\ncustomer_type: Transient\nadr: 0.0\nrequired_car_parking_spaces: 0\ntotal_of_special_requests: 0\nreservation_status: Check-Out\nreservation_status_date: 2015-07-01\nis_corporate: 0\nrevenue: 0.0'),
 Document(metadata={'source': 'hotel_bookings_cleaned.csv', 'row': 1}, page_content=': 1\nhotel: Resort Hotel\nis_canceled: 0\nlead_time: 737\narrival_date_year: 2015\n

In [None]:
documents = text_splitter.split_documents(data)
documents

In [5]:
from langchain_community.vectorstores import FAISS

In [8]:
db = FAISS.from_documents(documents,OllamaEmbeddings(model="nomic-embed-text"))

In [9]:
query = "Show me total revenue for July 2017."
result = db.similarity_search(query)
result[0].page_content

': 32300\nhotel: Resort Hotel\nis_canceled: 0\nlead_time: 312\narrival_date_year: 2017\narrival_date_month: July\narrival_date_week_number: 27\narrival_date_day_of_month: 2\nstays_in_weekend_nights: 4\nstays_in_week_nights: 10\nadults: 2\nchildren: 0.0\nbabies: 0\nmeal: BB\ncountry: GBR\nmarket_segment: Offline TA/TO\ndistribution_channel: TA/TO\nis_repeated_guest: 0\nprevious_cancellations: 0\nprevious_bookings_not_canceled: 0\nreserved_room_type: A\nassigned_room_type: A\nbooking_changes: 0\ndeposit_type: No Deposit\nagent: 243\ndays_in_waiting_list: 0\ncustomer_type: Contract\nadr: 104.85\nrequired_car_parking_spaces: 0\ntotal_of_special_requests: 1\nreservation_status: Check-Out\nreservation_status_date: 2017-07-16\nis_corporate: 0\nrevenue: 1467.8999999999999'

In [10]:
from langchain_community.llms import Ollama

llm = Ollama(model="llama2")
llm

  llm = Ollama(model="llama2")


Ollama()

In [11]:
## Design a chat prompt template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context.
Think step by step before providing a detailed answer.
I will tip you $1000 if the user finds the answer helpful.
NO PREAMBLE
<context>           
{context}
</context> 
Question: {input}
""")


In [12]:
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm,prompt)

In [13]:
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001723494FB20>, search_kwargs={})

In [14]:
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever,document_chain)

In [16]:
response = retrieval_chain.invoke({"input":"Show me total revenue for July 2017."})

In [17]:
response['answer']

" Based on the provided context, the total revenue for July 2017 is $2,687.899999999999.\n\nHere's how I calculated it:\n\n* Revenue for hotel ID 32300: $1467.899999999999\n* Revenue for hotel ID 32400: $988.00\n* Revenue for hotel ID 19167: $270.00\n\nTotal revenue = $1467.899999999999 + $988.00 + $270.00 = $2,687.8999999999999"