In [None]:
%pip install langchain openai chromadb tiktoken jq

import os
import csv
import json
import time
import random
from openai import OpenAI
from langchain.document_loaders import JSONLoader
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

api_key = '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'
os.environ["OPENAI_API_KEY"] = api_key
client = OpenAI(api_key=api_key)

# Input Data

In [2]:
def to_str(playlist):
    playlist_str = f"Playlist Name: {playlist['name']}\n"
    playlist_str += "Tracks:\n"
    for track in playlist['tracks']:
        playlist_str += f"{track['pos']+1}. {track['track_name']} by {track['artist_name']}\n"

    return playlist_str

In [3]:
# load dataset
input_file = 'processed_5000-5999.json'
with open(input_file, 'r') as inputFile:
    data = json.load(inputFile)

# RAG

In [40]:
def metadata_func(record: dict, metadata: dict) -> dict:
    metadata["Moods"] = record.get("Moods" ,  "")
    metadata["Keywords"] = record.get("Keywords", "")
    metadata["artist_name"] = record.get("artist_name", "")
    metadata["genre"] = record.get("genre", "")
    metadata["topic"] = record.get("topic", "")
    metadata["release_date"] = record.get("release_date", "")

    return metadata

In [11]:
def initialize_db(file_path, chunk_size=1000, chunk_overlap=0, persist_directory="./chroma_db"):
    # document loader
    loader = JSONLoader(
        file_path=file_path,
        jq_schema='.track_data[]',
        content_key="track_name",
        metadata_func=metadata_func
    )
    data = loader.load()

    # document transform
    text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(data)

    # document embedding
    embeddings = OpenAIEmbeddings()

    # vector database
    db = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory)

    return db

In [21]:
def retrieve(query, db, k=15):
    # retriever
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})

    # retrieve relevant documents
    retrieved_docs = retriever.get_relevant_documents(query)
    retrieved_str = '\n'.join([doc.page_content for doc in retrieved_docs])
    return retrieved_str

# Prompt

In [41]:
feature_prompt = """
Analyze a user's playlist to deduce musical preferences. Given the playlist name, and a list of tracks with corresponding artist names:
1. Determine the user's top three favourite music genres based on the tracks' styles.
2. Identify the user's top three favourite artists from the playlist.
3. Ascertain the user's preferred language of music, indicating regional music preferences if any.
4. Infer the contextual use of the playlist by identifying any patterns that suggest particular events, locations, or themes (e.g., workout songs, travel music, 90s hits).
5. Assess the overall mood of the playlist. Categorize and summarize the mood based on the tone and tempo of the songs into 5 words.
Compile the findings into a profile summary that shows the user's musical tastes and the intended experience of the playlist.

Strictly follow the desired output format, and do not add any other entries except the following 5 entries:
Music genres: ...
Artists: ...
Languages: ...
Contextual uses: ...
Mood: ...
"""

recommendation_prompt = """
As a music recommender system, provide tailored song recommendations based on detailed user inputs. Given:
- The user's specific music preferences, such as favorite genres and artists.
- The user's language preferences for music.
- The context or setting in which the user typically listens to the playlist (e.g., during workouts, for relaxation, while commuting).
- The user's mood preferences for songs
Recommend 10 unique, real, published songs that align with the user's musical preferences.
Present the recommendations in a list format, with each entry following the '[song_name] by [artist_name]' structure.
"""

# GPT

In [8]:
def generate(system_prompt, user_input):
    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_input}
            ],
            temperature=0, # using greedy decoding
            max_tokens=256,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0
          )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Evaluate

In [44]:
def recommend(playlist):
    # get input playlist string
    input_playlist = to_str(playlist)

    # get features from gpt
    query = generate(feature_prompt, input_playlist)

    # get relevant context via rag
    context = retrieve(query, db)

    # get song recommendations from gpt
    user_input = query + f"\nContext:\n{context}"
    recommendations = generate(recommendation_prompt, user_input)

    return input_playlist, recommendations

In [43]:
def save_output(playlists):
    with open('output.csv', 'w', newline='') as output_file:
        writer = csv.writer(output_file)
        writer.writerow(['playlist', 'recommendation'])

        for playlist in playlists:
            input, output = recommend(playlist)
            writer.writerow([input, output])

# Main

In [12]:
db = initialize_db('processed_songs_2w.json')

In [32]:
playlist1 = data['playlists'][5]
user_input = to_str(playlist1)
print(user_input)

Playlist Name: Oldies
Tracks:
1. Spirit In The Sky by Norman Greenbaum
2. More Than a Feeling by Boston
3. You Win Again by Bee Gees
4. Runaround Sue by Dion
5. My Girl by The Temptations
6. She's Gone by Daryl Hall & John Oates
7. Johnny B. Goode by Chuck Berry
8. Smokin' by Boston
9. The Book Of Love by The Monotones
10. September by Earth, Wind & Fire
11. Don't You Worry 'Bout A Thing by Stevie Wonder
12. Sweet Child O' Mine by Guns N' Roses
13. Fire And Rain by James Taylor
14. Stand By Me by Ben E. King
15. I Wish by Stevie Wonder
16. Five Months, Two Weeks, Two Days by Keely Smith
17. Missing You by John Waite
18. Back In Black by AC/DC



In [33]:
query = generate(feature_prompt, user_input)
print(query)

Music genres: Rock, Soul, Pop
Artists: Boston, Stevie Wonder, Bee Gees
Languages: English
Contextual uses: Nostalgia, Relaxation, Sing-along
Mood: Uplifting, Nostalgic, Energetic, Soulful, Melancholic


In [34]:
context = retrieve(query, db)
print(context)

sweet soul music
music scene
music, music, music
melodies and memories
music city usa
sweet music
soul blues
music for lovers
lights & music
street jazz
love music
music is life
american music
american music
words and music


In [38]:
user_input = query + f"\nContext:\n{context}"
print(user_input)

Music genres: Rock, Soul, Pop
Artists: Boston, Stevie Wonder, Bee Gees
Languages: English
Contextual uses: Nostalgia, Relaxation, Sing-along
Mood: Uplifting, Nostalgic, Energetic, Soulful, Melancholic
Context:
sweet soul music
music scene
music, music, music
melodies and memories
music city usa
sweet music
soul blues
music for lovers
lights & music
street jazz
love music
music is life
american music
american music
words and music


In [39]:
output = generate(recommendation_prompt, user_input)
print(output)

1. "More Than a Feeling" by Boston
2. "Isn't She Lovely" by Stevie Wonder
3. "Stayin' Alive" by Bee Gees
4. "Peace of Mind" by Boston
5. "Superstition" by Stevie Wonder
6. "How Deep Is Your Love" by Bee Gees
7. "Foreplay / Long Time" by Boston
8. "I Just Called to Say I Love You" by Stevie Wonder
9. "Night Fever" by Bee Gees
10. "Rock & Roll Band" by Boston
