In [82]:
import pandas as pd
df = pd.read_csv('../../spotify-tracks-dataset.csv')
df = df[df['artists'].notna()] # remove any NaN values as it blows up serialization
data = df.sample(1500).to_dict('records') # Get only 700 records. More records will make it slower to index
len(data)

1500

In [83]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

In [84]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [85]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [96]:
# Create collection to store spotify music
# qdrant.recreate_collection(
#     collection_name="top_tracks",
#     vectors_config=models.VectorParams(
#         size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
#         distance=models.Distance.COSINE
#     )
# )
#not(qdrant.collection_exists("top_tracks"))
qdrant.delete_collection("top_tracks")
qdrant.create_collection(
    collection_name="top_tracks",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

True

In [97]:
# vectorize!
qdrant.upload_points(
    collection_name="top_tracks",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["track_name"]).tolist(),
            payload=doc,
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

In [98]:
user_prompt = "Suggest me an amazing Hip Hop Album"

In [99]:
# Search time for awesome wines!
hits = qdrant.search(
    collection_name="top_tracks",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'Unnamed: 0': 48124, 'track_id': '19FsxX4RthRMZGfXkImdCb', 'artists': 'The Notorious B.I.G.', 'album_name': 'Life After Death (2014 Remastered Edition)', 'track_name': 'Notorious Thugs - 2014 Remaster', 'popularity': 68, 'duration_ms': 366880, 'explicit': True, 'danceability': 0.704, 'energy': 0.873, 'key': 11, 'loudness': -4.057, 'mode': 0, 'speechiness': 0.144, 'acousticness': 0.121, 'instrumentalness': 5.12e-06, 'liveness': 0.24, 'valence': 0.742, 'tempo': 154.914, 'time_signature': 4, 'track_genre': 'hardcore'} score: 0.497637899139216
{'Unnamed: 0': 5277, 'track_id': '4UyTPpfDrZJj38mw0O9Dsf', 'artists': 'Rustage;Broken', 'album_name': 'Bounce Back (Luffy Rap)', 'track_name': 'Bounce Back (Luffy Rap)', 'popularity': 49, 'duration_ms': 182520, 'explicit': False, 'danceability': 0.829, 'energy': 0.885, 'key': 9, 'loudness': -3.672, 'mode': 0, 'speechiness': 0.213, 'acousticness': 0.392, 'instrumentalness': 0.0, 'liveness': 0.0653, 'valence': 0.674, 'tempo': 125.098, 'time_signature'

In [90]:
# define a variable to hold the search results
search_results = [hit.payload for hit in hits]

In [None]:
# Now time to connect to the local large language model
from openai import OpenAI
client = OpenAI(
    base_url="http://127.0.0.1:8080/v1",
    api_key = "sk-no-key-required"
)
completion = client.chat.completions.create(
    model="LLaMA_CPP",
    messages=[
        {"role": "system", "content": "You are chatbot, a music specialist. Your top priority is to help guide users into selecting amazing albums and tracks and guide them with their requests."},
        {"role": "user", "content": "Suggest me an amazing Hip Hop album"},
        {"role": "assistant", "content": str(search_results)}
    ]
)
print(completion.choices[0].message)

NotFoundError: File Not Found