LLM takes in user data & recommends "sessions" to the users -> pass into vector database to fetch real sessions. We first initialise our API Keys.

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

True

Then, we will initialise our vector DB client, the first query will take about 20 seconds as there will be authentication, subsequent queries will be much shorter.

In [19]:
from qdrant_client import QdrantClient, models

QDRANT_URL=os.getenv("QDRANT_URL")
QDRANT_API_KEY=os.getenv("QDRANT_API_KEY")
COLLECTION_NAME=os.getenv("COLLECTION_NAME")

qdrant_client = QdrantClient(
	url=QDRANT_URL, 
	api_key=QDRANT_API_KEY,
)

print(qdrant_client.get_collections())

collections=[CollectionDescription(name='aflow_sessions')]


Now, we create a method to embed the artist's profile and a fetch method to fetch the data.

In [10]:
from openai import OpenAI
openai_client = OpenAI()
openai_client.api_key=os.getenv("OPENAI_API_KEY")

EMBEDDING_MODEL=os.getenv("EMBEDDING_MODEL")
CHAT_MODEL=os.getenv("CHAT_MODEL")

In [11]:
def text_to_embedding(text):
    embeddings = openai_client.embeddings.create(
        model=EMBEDDING_MODEL,
        input=text,
        encoding_format="float"
    )
    return embeddings.data[0].embedding

def fetch_recommended_sessions(artist_profile, collection_name=COLLECTION_NAME, limit=5):
    artist_embedding = text_to_embedding(artist_profile)
    similar_sessions = qdrant_client.search(
        collection_name=collection_name,
        query_vector=artist_embedding,
        limit=limit
    )
    return [session.payload["session_name"] for session in similar_sessions]

def get_all_session_similarities(collection_name=COLLECTION_NAME):
    all_sessions = qdrant_client.search(
        collection_name=collection_name,
        query_vector=[],
        limit=1000
    )
    return [(session.payload['id'], session.score) for session in all_sessions]



Now lets test the accuracy of these queries. First, we upload the session metadata as embeddings into our vector database.

In [16]:
import json
from pathlib import Path

DEFAULT_PATH = '../../assets/datasets/session_list_070125.json'

def read_json(file_path=Path(DEFAULT_PATH)):
	with open(file_path, 'r') as file:
		sessions = json.load(file)
	return sessions

sessions = read_json()
print(sessions['Boost Spotify bio'])

{'id': 8, 'category': 'Content Creation', 'keywords': ['bio writing', 'artist profile', 'brand messaging', 'Spotify optimization', 'audience engagement', 'platform optimization', 'content strategy'], 'summary': 'Generate an engaging, algorithm-friendly Spotify biography in under 30 minutes. Delivers a polished bio that connects with fans while boosting visibility on the platform.', 'intended_target_audience': {'demographic': 'Independent Artist', 'interests': ['platform optimization', 'brand development', 'fan engagement', 'discoverability'], 'preferences': ['quick execution', 'guided process']}}


## DO THIS ONE TIME, TO INITIALISE DATABASE

In [17]:
COLLECTION_NAME=os.getenv("COLLECTION_NAME")

# Creates qdrant vector database instance
qdrant_client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE)
)

True

In [18]:
# Convert the session metadata into embeddings
session_embeddings = {}

# Iterate through each session
for session_name, session_data in sessions.items():
	# Convert session data to string for embedding
	session_text = f"""
	Category: {session_data['category']}
	Keywords: {', '.join(session_data['keywords'])}
	Summary: {session_data['summary']}
	Target Audience: {session_data['intended_target_audience']['demographic']}
	Interests: {', '.join(session_data['intended_target_audience']['interests'])}
	Preferences: {', '.join(session_data['intended_target_audience']['preferences'])}
	"""
	
	# Get embedding
	embedding = text_to_embedding(session_text)

	# Store result
	session_embeddings[session_name] = {
		'id': session_data['id'],
		'metadata': {
			'category': session_data['category'],
			'keywords': session_data['keywords'],
      'summary': session_data['summary'],
      'intended_target_audience': session_data['intended_target_audience'],
      'interests': session_data['intended_target_audience']['interests'],
			'preferences': session_data['intended_target_audience']['preferences']
    },
		'embedding': embedding
	}

# Upload vector embeddings into QDRANT DB
from datetime import datetime

for idx, (session_name, session_data) in  enumerate(session_embeddings.items()):
	qdrant_client.upsert(
		collection_name=os.getenv("COLLECTION_NAME"),
		points=[
			models.PointStruct(
				id=idx,
				payload={
					"id": session_data["id"],
					"session_name":session_name,
					"metadata":session_data["metadata"],
					"created_at":datetime.now().isoformat()
				},
				vector=session_data["embedding"]
			)
		]
	)

Now that the vector database has been successfully setup, lets test the similarity of a session with other sessions.

In [15]:
test_prompt = 'The artist wants a session to boost his spotify bio, so that he can gain a larger following on spotify, he also wants to create a press release for his next single, coming out in just 2 weeks.'
recommendations = fetch_recommended_sessions(test_prompt)

print(recommendations)

['Boost Spotify bio', 'Create Spotify pitch', 'How 2 claim your Spotify profile', 'Wrapped promo 2024', 'Create label pitch']
