LLM takes in user data & recommends "sessions" to the users -> pass into vector database to fetch real sessions. We first initialise our API Keys.

In [12]:
import os
from dotenv import load_dotenv

load_dotenv()

True

Then, we will initialise our vector DB client, the first query will take about 20 seconds as there will be authentication, subsequent queries will be much shorter.

In [21]:
from qdrant_client import QdrantClient, models

QDRANT_URL=os.getenv("QDRANT_URL")
QDRANT_API_KEY=os.getenv("QDRANT_API_KEY")
COLLECTION_NAME="singapore_events"

qdrant_client = QdrantClient(
	url=QDRANT_URL, 
	api_key=QDRANT_API_KEY,
)

print(qdrant_client.get_collections())

collections=[CollectionDescription(name='midjourney'), CollectionDescription(name='star_charts'), CollectionDescription(name='aflow_sessions'), CollectionDescription(name='singapore_events')]


Now, we create a method to embed the artist's profile and a fetch method to fetch the data.

In [14]:
from openai import OpenAI
openai_client = OpenAI()
openai_client.api_key=os.getenv("OPENAI_API_KEY")

In [25]:
def text_to_embedding(text):
    embeddings = openai_client.embeddings.create(
        model="text-embedding-3-small",
        input=text,
        encoding_format="float"
    )
    return embeddings.data[0].embedding

def fetch_recommended_entities(query_entity, collection_name=COLLECTION_NAME, limit=5):
    query_embedding = text_to_embedding(query_entity)
    similar_entities = qdrant_client.search(
        collection_name=collection_name,
        query_vector=query_embedding,
        limit=limit
    )
    return [entity.payload for entity in similar_entities]

Now lets test the accuracy of these queries. First, we upload the session metadata as embeddings into our vector database.

In [16]:
import json
from pathlib import Path

def read_json(file_path=Path('../assets/datasets/singapore.json')):
	with open(file_path, 'r') as file:
		events = json.load(file)
	return events

events = read_json()
print(events)

{'events': [{'id': 'dance001', 'name': 'Contemporary Dance Workshop', 'category': 'dance', 'subcategory': 'contemporary', 'description': 'Professional contemporary dance workshop for intermediate dancers focusing on technique and expression', 'location': 'Goodman Arts Centre', 'area': 'East Coast', 'price_range': '$$', 'actual_price': 75, 'duration_hours': 3, 'suitable_for': ['young_adults', 'adults'], 'intensity_level': 'moderate', 'tags': ['dance', 'workshop', 'contemporary', 'arts']}, {'id': 'dance002', 'name': 'K-Pop Dance Class', 'category': 'dance', 'subcategory': 'kpop', 'description': 'Learn popular K-pop choreographies in this high-energy class', 'location': 'O School', 'area': 'Somerset', 'price_range': '$', 'actual_price': 35, 'duration_hours': 1.5, 'suitable_for': ['teenagers', 'young_adults'], 'intensity_level': 'high', 'tags': ['dance', 'kpop', 'exercise']}, {'id': 'dance003', 'name': 'Street Dance Basics', 'category': 'dance', 'subcategory': 'street', 'description': 'Int

## DO THIS ONE TIME, TO INITIALISE DATABASE

In [17]:
'''
# Creates qdrant vector database instance
qdrant_client.create_collection(
    collection_name="singapore_events",
    vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE)
)

# Convert the session metadata into embeddings
embeddings = {}

# Iterate through each session
for event in events['events']:
    # Convert event data to string for embedding
    event_text = f"""
    Name: {event['name']}
    Category: {event['category']}
    Subcategory: {event['subcategory']}
    Description: {event['description']}
    Location: {event['location']}
    Area: {event['area']}
    Price Range: {event['price_range']}
    Duration: {event['duration_hours']} hours
    Suitable For: {', '.join(event['suitable_for'])}
    Intensity Level: {event['intensity_level']}
    Tags: {', '.join(event['tags'])}
    """

    embedding = text_to_embedding(event_text)
    embeddings[event['id']] = {
        'metadata': event,
        'embedding': embedding
    }

# Upload vector embeddings into QDRANT DB
from datetime import datetime

# Iterate through each event and its embedding
for idx, (event_id, event_data) in enumerate(embeddings.items()):
    qdrant_client.upsert(
        collection_name=COLLECTION_NAME,
        points=[
            models.PointStruct(
                id=idx,  # Using index as ID
                payload={
                    "event_id": event_id,
                    "name": event_data["metadata"]["name"],
                    "category": event_data["metadata"]["category"],
                    "subcategory": event_data["metadata"]["subcategory"],
                    "description": event_data["metadata"]["description"],
                    "location": event_data["metadata"]["location"],
                    "area": event_data["metadata"]["area"],
                    "price_range": event_data["metadata"]["price_range"],
                    "actual_price": event_data["metadata"]["actual_price"],
                    "duration_hours": event_data["metadata"]["duration_hours"],
                    "suitable_for": event_data["metadata"]["suitable_for"],
                    "intensity_level": event_data["metadata"]["intensity_level"],
                    "tags": event_data["metadata"]["tags"],
                    "created_at": datetime.now().isoformat()
                },
                vector=event_data["embedding"]
            )
        ]
    )
'''

Now that the vector database has been successfully setup, lets test the similarity of a session with other sessions.

In [26]:
test_prompt = 'I want a to dance.'
recommendations = fetch_recommended_entities(test_prompt)

print(recommendations)

[{'event_id': 'dance006', 'name': 'Urban Dance Choreography', 'category': 'dance', 'subcategory': 'urban', 'description': 'Learn trending urban dance routines', 'location': 'Steps Dance Studio', 'area': 'Bugis', 'price_range': '$$', 'actual_price': 40, 'duration_hours': 1.5, 'suitable_for': ['young_adults'], 'intensity_level': 'high', 'tags': ['dance', 'urban', 'contemporary'], 'created_at': '2024-12-23T20:31:36.886075'}, {'event_id': 'fashion001', 'name': 'Dance Fashion Styling', 'category': 'fashion', 'subcategory': 'styling', 'description': 'Learn to style outfits for different dance styles and performances', 'location': 'Style Theory Studio', 'area': 'Orchard', 'price_range': '$$', 'actual_price': 120, 'duration_hours': 3, 'suitable_for': ['young_adults', 'dancers'], 'intensity_level': 'low', 'tags': ['fashion', 'dance', 'style', 'workshop'], 'created_at': '2024-12-23T20:31:42.804780'}, {'event_id': 'dance014', 'name': 'Heels Dance Class', 'category': 'dance', 'subcategory': 'heels