Setup a Redis vector store that works with LlamaIndex for the FAQ and amenities info.

Load environment variables from .env file

In [1]:
import os
from dotenv import load_dotenv
load_dotenv('../.env')

True

Setup the embedding model

In [2]:
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

Connect to Redis Cloud

In [3]:
from llama_index.vector_stores.redis import RedisVectorStore
from llama_index.core import VectorStoreIndex, StorageContext
from redisvl.schema import IndexSchema 

redis_conn_string = os.getenv("REDIS_URL")
schema = IndexSchema.from_dict(
    {
        "index": {"name": "blue_horizon_vector", "prefix": "blue_horizon"},
        # customize fields that are indexed
        "fields": [
            # required fields for llamaindex
            {"type": "tag", "name": "id"},
            {"type": "tag", "name": "doc_id"},
            {"type": "text", "name": "text"},
            # custom vector field for bge-small-en-v1.5 embeddings
            {
                "type": "vector",
                "name": "vector",
                "attrs": {
                    "dims": 384,
                    "algorithm": "hnsw",
                    "distance_metric": "cosine",
                },
            },
        ],
    }
)
vector_store = RedisVectorStore(schema=schema, redis_url=redis_conn_string, overwrite=True)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

[32m16:08:08[0m [34mredisvl.index.index[0m [1;30mINFO[0m   Index already exists, overwriting.


Load the FAQ

In [4]:
import pandas as pd
df_faq = pd.read_pickle('../data/pandas/faq_knowledge_base.pkl')
df_faq

Unnamed: 0_level_0,category,subcategory,question,answer,keywords,last_updated,helpful_votes,views
faq_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
FAQ000001,booking,reservations,How do I make a reservation?,You can make a reservation through our website...,"book, reserve, reservation, booking",2024-03-09,957,3813
FAQ000002,booking,check-in/out,What is the check-in/check-out time?,Check-in time is 3:00 PM and check-out time is...,"check in, check out, arrival, departure",2024-10-15,967,2793
FAQ000003,booking,modifications,Can I modify my booking?,"Yes, you can modify your booking through our w...","change, modify, update, edit",2024-12-05,443,1929
FAQ000004,booking,cancellations,What is the cancellation policy?,Free cancellation is available up to 48 hours ...,"cancel, refund, cancellation",2024-05-20,611,1300
FAQ000005,booking,special requests,Do you offer early check-in/late check-out?,"Subject to availability, early check-in and la...","early check-in, late check-out, extended stay",2024-12-18,239,3954
FAQ000006,amenities,facilities,What amenities are included?,"Our hotel offers complimentary Wi-Fi, fitness ...","amenities, facilities, services",2024-10-24,855,1860
FAQ000007,amenities,dining,Is breakfast included?,"Yes, breakfast is included with most room rate...","breakfast, dining, restaurant",2024-01-10,365,1943
FAQ000008,amenities,wellness,Do you have a fitness center?,"Yes, our fitness center is open 24/7 and featu...","fitness, gym, exercise",2024-10-05,988,1471
FAQ000009,amenities,business,Is there a swimming pool?,"Yes, we have both indoor and outdoor pools ope...","pool, swimming, recreation",2024-10-15,400,2881
FAQ000010,amenities,recreation,Do you offer spa services?,"Our full-service spa offers massages, facials,...","spa, wellness, treatments",2024-01-07,145,766


Prepare data for storage

In [5]:
from llama_index.core.schema import TextNode

nodes = []
for id, row in df_faq.iterrows():
    main_text = 'Question:\n' + row['question'] + '\n\nAnswer:\n' + row['answer']
    metadata = {'category': row['category'],
                'subcategory': row['subcategory'],
                'keywords': row['keywords'],
                'last_updated': row['last_updated']}
    node = TextNode(text=main_text, id_=id, metadata=metadata)
    nodes.append(node)

Store in vector store

In [6]:
index = VectorStoreIndex(nodes=nodes, storage_context=storage_context)

Setup and test retrieval

In [7]:
retriever = index.as_retriever(similarity_top_k=3)

In [8]:
retriever.retrieve('Is there a place to swim?')

[NodeWithScore(node=TextNode(id_='FAQ000009', embedding=None, metadata={'category': 'amenities', 'subcategory': 'business', 'keywords': 'pool, swimming, recreation', 'last_updated': '2024-10-15'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='Question:\nIs there a swimming pool?\n\nAnswer:\nYes, we have both indoor and outdoor pools open from 6:00 AM to 10:00 PM.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.741773724556),
 NodeWithScore(node=TextNode(id_='FAQ000006', embedding=None, metadata={'category': 'amenities', 'subcategory': 'facilities', 'keywords': 'amenities, facilities, services', 'last_updated': '2024-10-24'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='Question:\nWhat ame

Add amenity information

In [9]:
df_amenities = pd.read_pickle('../data/pandas/amenities.pkl')
df_amenities.head()

Unnamed: 0_level_0,category,name,price,duration,description,availability,location,booking_required,min_notice_hours
amenity_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AM000001,Spa Services,Swedish Massage,120,60,Experience pure relaxation with our Swedish Ma...,24/7,Spa & Wellness Center,True,2
AM000002,Spa Services,Deep Tissue Massage,140,60,"Indulge in our signature Deep Tissue Massage, ...",6:00-22:00,Spa & Wellness Center,False,4
AM000003,Spa Services,Couples Massage Experience,280,90,Indulge in our signature Couples Massage Exper...,By appointment only,Spa & Wellness Center,False,0
AM000004,Spa Services,Hot Stone Therapy,160,90,Experience pure relaxation with our Hot Stone ...,6:00-22:00,Pool Area,False,1
AM000005,Spa Services,Luxury Facial Treatment,180,90,Indulge in our signature Luxury Facial Treatme...,24/7,Main Building,False,24


In [10]:
nodes = []
for id, row in df_amenities.iterrows():
    main_text = 'Name:' + row['name'] + '\n\nDescription:\n' + row['description']
    metadata = {'category': row['category'],
                'price': row['price'],
                'duration': row['duration'],
                'availability': row['availability'],
                'location': row['location'],
                'booking_required': str(row['booking_required']),
                'min_notice_hours': row['min_notice_hours']}
    node = TextNode(text=main_text, id_=id, metadata=metadata)
    nodes.append(node)

In [11]:
index.insert_nodes(nodes)

In [12]:
retriever.retrieve('Can I get a massage?')

[NodeWithScore(node=TextNode(id_='FAQ000010', embedding=None, metadata={'category': 'amenities', 'subcategory': 'recreation', 'keywords': 'spa, wellness, treatments', 'last_updated': '2024-01-07'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='Question:\nDo you offer spa services?\n\nAnswer:\nOur full-service spa offers massages, facials, and body treatments. Advance booking is recommended.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.7244102358819999),
 NodeWithScore(node=TextNode(id_='AM000003', embedding=None, metadata={'category': 'Spa Services', 'price': 280, 'duration': 90, 'availability': 'By appointment only', 'location': 'Spa & Wellness Center', 'booking_required': 'False', 'min_notice_hours': 0}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, 

Add services information

In [13]:
df_services = pd.read_pickle('../data/pandas/services.pkl')
df_services.head()

Unnamed: 0_level_0,service_type,name,description,duration_minutes,price,department,booking_required,min_notice_hours
service_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SV000001,Spa Treatment,Spa Treatment - 120 min,Swedish massage for 120 minutes using Modern t...,120,264.98,Spa & Wellness,True,4
SV000002,Spa Treatment,Spa Treatment - 60 min,Luxurious 60-minute Deep Tissue treatment feat...,60,232.03,Spa & Wellness,True,4
SV000003,Spa Treatment,Spa Treatment - 120 min,Premium Deep Tissue experience for 120 minutes...,120,251.99,Spa & Wellness,True,24
SV000004,Spa Treatment,Spa Treatment - 90 min,Luxurious 90-minute Hot Stone treatment featur...,90,157.55,Spa & Wellness,True,1
SV000005,Personal Training,Personal Training - 60 min,Custom 60-minute HIIT workout emphasizing Core...,60,133.13,Fitness Center,True,4


In [14]:
nodes = []
for id, row in df_services.iterrows():
    main_text = 'Name:' + row['name'] + '\n\nDescription:\n' + row['description']
    metadata = {'service_type': row['service_type'],
                'duration_minutes': row['duration_minutes'],
                'price': row['price'],
                'department': row['department'],
                'booking_required': str(row['booking_required']),
                'min_notice_hours': row['min_notice_hours']}
    node = TextNode(text=main_text, id_=id, metadata=metadata)
    nodes.append(node)

In [15]:
index.insert_nodes(nodes)

In [16]:
retriever.retrieve('I want to do some pilates.')

[NodeWithScore(node=TextNode(id_='AM000011', embedding=None, metadata={'category': 'Fitness Services', 'price': 90, 'duration': 60, 'availability': '24/7', 'location': 'Main Building', 'booking_required': 'False', 'min_notice_hours': 24}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='Name:Private Pilates Session\n\nDescription:\nTransform your fitness journey with our Private Pilates Session, a 60-minute session that boosts energy and vitality. Our certified trainers will expert form guidance, ensuring aligned and centered.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.741934299469),
 NodeWithScore(node=TextNode(id_='SV000024', embedding=None, metadata={'service_type': 'Group Fitness Class', 'duration_minutes': 45, 'price': 30.93, 'department': 'Fitness Center', 'booking_required': 'True'