In [None]:
%pip install openai ipynb

import os
import csv
import json
import time
import random
from openai import OpenAI

api_key = ''
client = OpenAI(api_key=api_key)

In [None]:
# prompts
feature_prompt = """
Analyze a user's playlist to deduce musical preferences. Given the playlist name, and a list of tracks with corresponding artist names:
1. Determine the user's top three favourite music genres based on the tracks' styles.
2. Identify the user's top three favourite artists from the playlist.
3. Ascertain the user's preferred language of music, indicating regional music preferences if any.
4. Infer the contextual use of the playlist by identifying any patterns that suggest particular events, locations, or themes (e.g., workout songs, travel music, 90s hits).
5. Assess the overall mood of the playlist. Categorize the mood (e.g., calm, sad, happy, energetic) based on the tone and tempo of the songs.
Compile the findings into a profile summary that shows the user's musical tastes and the intended experience of the playlist.

Example:
User: 
Playlist Name: tiktok
Tracks:
- Galway Girl by Ed Sheeran
- Issues by Julia Michaels
- As Long As You Love Me by Justin Bieber/Big Sean
- Thinking Out Loud by Ed Sheeran
- Dancing With Your Ghost by Sasha Alex Sloan
- Peaches by Justin Bieber
- Let Me Down Slowly by Alec Benjamin
- Astronaut In The Ocean by Masked Wolf
- YOUTH by Troye Sivan
- Bet On Me by Walk Off the Earth/D Smoke

Assistant:
Favorite music genres: Pop, Hip-Hop, Indie Pop
Favorite artists: Ed Sheeran, Justin Bieber, Julia Michaels
Preferred languages of music: English
Contextual use of playlist: Casual listening, TikTok videos, social gatherings or parties
Mood of the playlist: A mix of energetic and emotional. 
"""

recommendation_prompt = """
As a sophisticated music recommender system, provide tailored song recommendations based on detailed user inputs. Given:

- A user's existing playlist, including the playlist name, song names and artist names.
- The user's specific music preferences, such as favorite genres and artists.
- The user's language preferences for music.
- The context or setting in which the user typically listens to the playlist (e.g., during workouts, for relaxation, while commuting).
- The overall mood of the playlist, characterized as energetic, calm, emotional, etc.

Recommend 10 unique songs that align with the user's tastes and playlist characteristics. Ensure that these songs are not already present in the user's playlist. Present the recommendations in a list format, with each entry following the '[song_name] by [artist_name]' structure. The recommendations should reflect real, published songs that cater to the user's musical preferences and the intended experience of their playlist.

Example:
User: 
Playlist Name: tiktok
Tracks:
- Galway Girl by Ed Sheeran
- Issues by Julia Michaels
- As Long As You Love Me by Justin Bieber/Big Sean
- Thinking Out Loud by Ed Sheeran
- Dancing With Your Ghost by Sasha Alex Sloan
- Peaches by Justin Bieber
- Let Me Down Slowly by Alec Benjamin
- Astronaut In The Ocean by Masked Wolf
- YOUTH by Troye Sivan
- Bet On Me by Walk Off the Earth/D Smoke

Favorite music genres: Pop, Hip-Hop, Indie Pop
Favorite artists: Ed Sheeran, Justin Bieber, Julia Michaels
Preferred languages of music: English
Contextual use of playlist: Casual listening, TikTok videos, social gatherings or parties
Mood of the playlist: A mix of energetic and emotional. 

Assistant:
- Shape of You by Ed Sheeran
- Attention by Charlie Puth
- Sorry by Justin Bieber
- Stay by The Kid LAROI/Justin Bieber
- Runaway by Sasha Alex Sloan
- Normal No More by Tysm
- Love Story by Taylor Swift
- Strawberries & Cigarettes by Troye Sivan
- Perfect by Ed Sheeran
- Liability by Jasper Zheng
"""

In [None]:
# function that split the playlist into train and test set
def split_train_test(playlist, test_size=0.1):

    # random shuffle the playlist
    tracks = playlist['tracks']
    random.shuffle(tracks)

    num_tracks = len(tracks)
    num_test_tracks = int(num_tracks * test_size)

    test_set = tracks[:num_test_tracks]
    train_set = {
        "playlist_name": playlist["playlist_name"],
        "tracks": tracks[num_test_tracks:]
    }
    
    return train_set, test_set

In [None]:
# function that convert playlist dictionary to string
def to_str(playlist, test=False):
    if test:
        playlist_str = ''
        for track in playlist:
            playlist_str += f"- {track['track_name']} by {track['artist_name']}\n"
    else:
        playlist_str = f"Playlist Name: {playlist['playlist_name']}\n"
        playlist_str += "Tracks:\n"
        for track in playlist['tracks']:
            playlist_str += f"- {track['track_name']} by {track['artist_name']}\n"

    return playlist_str

In [None]:
# function that prompt the llm to generate response
def generate(system_prompt, user_input):
    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_input}
            ],
            temperature=0, # using greedy decoding
            max_tokens=256,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0
          )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [None]:
# function that check whether the recommendation is a hit, i.e., a success
def is_hit(output, test_set):
    # convert output and test set to lists
    output_lst = [line.strip('- ').strip() for line in output.split('\n') if line.strip()]
    test_set_lst = {f"{track['track_name']} by {track['artist_name']}" for track in test_set}

    for track in output_lst:
        if track in test_set_lst:
            return 1
    return 0

In [None]:
# function that run the main loop and save output to file
def run_and_save(playlists, filename):
    hit_count = 0
    delay_between_requests = 60  # delay in seconds

    with open(filename, 'w', newline='') as output_file:
        writer = csv.writer(output_file)

        # header
        writer.writerow(['Train Set', 'Test Set', 'Recommendations', 'Hit'])

        # main loop
        for playlist in playlists:
            # prompt llm for recommendations
            train_set, test_set = split_train_test(playlist)
            train_set_str = to_str(train_set)
            features = generate(feature_prompt, train_set_str)  
            user_input = train_set_str + "\n" + features
            recommendations = generate(recommendation_prompt, user_input) 

            # check for hit
            hit = is_hit(recommendations, test_set)
            hit_count += hit

            # convert test set dict to string
            test_set_str = to_str(test_set, True)

            # write data row
            writer.writerow([train_set_str, test_set_str, recommendations, hit])

            time.sleep(delay_between_requests)  # introduce a delay

    return hit_count / len(playlists)

In [None]:
# load dataset
input_file = 'cleaned_playlists.json'
with open(input_file, 'r') as inputFile:
    data = json.load(inputFile)