# PlaylistDivider: 
A program that uses ML to adapt to each user's definition of a category, then divides the user's playlists into smaller playlists, following those categories, for better organization

* Use Spotify API in conjunction with last.fm API (last.fm API doesn't seem to be working, but this project can still be done with getsongbpm api)
* Use pandas and np to manipulate and store the data
* Use scikit for ML

How to scale up:
 Have a login system, such that the algorithm for each person will have learned what each user defines as 'happy' or 'sad' and will naturally be able to better divide songs for that user

**This is the playlist dividing script**

This project is divided into two parts, where this script is for choosing the playlist to divide and then dividing it, and the other script is for training the model.

https://getsongbpm.com/api

https://www.last.fm/api/account/create
https://www.last.fm/music/Sabrina+Carpenter/_/Tears
https://listenbrainz.readthedocs.io/en/latest/users/api-compat.html
https://www.reddit.com/r/spotifyapi/comments/1ldtwro/track_audio_feature_substitute/
https://www.reddit.com/r/spotifyapi/comments/1h75k49/spotifys_api_changes_hurt_developersheres_a/
https://developer.spotify.com/blog/2024-11-27-changes-to-the-web-api
https://developer.spotify.com/documentation/web-api/reference/get-audio-features

https://www.kaggle.com/docs/api#authentication
https://www.kaggle.com/settings
https://www.kaggle.com/datasets/maharshipandya/-spotify-tracks-dataset
https://www.kaggle.com/datasets/conorvaneden/best-songs-on-spotify-for-every-year-2000-2023
https://www.kaggle.com/datasets/undefinenull/million-song-dataset-spotify-lastfm

https://developer.spotify.com/dashboard/8c862e73e7714036837f2fec988afcda
https://developer.spotify.com/documentation/web-api/concepts/access-token
https://developer.spotify.com/documentation/web-api/concepts/authorization

* https://github.com/bujjujj/PlaylistDivider



In [3]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import pylast
from tqdm import tqdm
import time
import joblib
import os
from collections import defaultdict

In [4]:
#Authentication
SPOTIPY_CLIENT_ID = os.getenv("SPOTIPY_CLIENT_ID")
SPOTIPY_CLIENT_SECRET = os.getenv("SPOTIPY_CLIENT_SECRET")
SPOTIPY_REDIRECT_URI = os.getenv("SPOTIPY_REDIRECT_URI")

LASTFM_API_KEY = os.getenv("LASTFM_API_KEY")
LASTFM_API_SECRET = os.getenv("LASTFM_API_KEY")
LASTFM_PSWD = os.getenv("LASTFM_PSWD")

# To generate your password hash:
# password_hash = pylast.md5("YourLastFmPassword")

USERNAME = "bujjujj"
PASSWORD_HASH = pylast.md5(LASTFM_PSWD)

# Authenticate with both services
print("Connecting to APIs...")
try:
    sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
        client_id=SPOTIPY_CLIENT_ID,
        client_secret=SPOTIPY_CLIENT_SECRET,
        redirect_uri=SPOTIPY_REDIRECT_URI,
        scope="playlist-modify-public playlist-read-private"
    ))
    user_id = sp.current_user()['id']
    network = pylast.LastFMNetwork(api_key=LASTFM_API_KEY, api_secret=LASTFM_API_SECRET)
    print("Successfully connected to Spotify & Last.fm!")
except Exception as e:
    print(f"Error during authentication: {e}")
    exit()

Connecting to APIs...
Successfully connected to Spotify & Last.fm!


In [5]:
# --- 2. Load the Trained Model ---

print("Loading the trained song classifier model...")
try:
    model = joblib.load('song_classifier.joblib')
except FileNotFoundError:
    print("Error: 'song_classifier.joblib' not found. Please run the training script first.")
    exit()

# Re-use the feature extraction function from the training script
def get_lastfm_features(artist, track_name):
    try:
        lfm_track = network.get_track(artist, track_name)
        top_tags = lfm_track.get_top_tags()
        feature_string = " ".join([tag.item.name.lower().replace(" ", "-") for tag in top_tags for _ in range(int(tag.weight))])
        return feature_string
    except Exception:
        return ""

Loading the trained song classifier model...


In [7]:
# --- 3. Classify a New Playlist ---

chosen_playlist = None
try:
    # Get the current user's playlists
    playlists = sp.current_user_playlists()['items']
    
    if not playlists:
        print("No playlists found in your Spotify account.")
    else:
        print("\nYour Spotify Playlists:")
        for i, playlist in enumerate(playlists):
            print(f"  {i+1}. {playlist['name']} ({playlist['tracks']['total']} tracks)")

        # Loop to get a valid user choice
        while True:
            try:
                choice = int(input("\nEnter the number of the playlist you want to sort: "))
                if 1 <= choice <= len(playlists):
                    chosen_playlist = playlists[choice - 1]
                    break
                else:
                    print("Invalid number. Please try again.")
            except ValueError:
                print("Please enter a valid number.")

except Exception as e:
    print(f"An error occurred while fetching playlists: {e}")

# --- Continue only if a playlist was successfully selected ---
if chosen_playlist:
    playlist_id_to_sort = chosen_playlist['id']
    print(f"\nFetching tracks from '{chosen_playlist['name']}'...")
    results = sp.playlist_items(playlist_id_to_sort)
    tracks_to_classify = results['items']
    while results['next']:
        results = sp.next(results)
        tracks_to_classify.extend(results['items'])

    # Dictionary to hold the sorted songs
    sorted_songs = defaultdict(list)

    print("Classifying songs...")
    for item in tqdm(tracks_to_classify, desc="Classifying"):
        track = item['track']
        if track and track['artists']:
            artist = track['artists'][0]['name']
            name = track['name']
            
            # Get the song's features
            features = get_lastfm_features(artist, name)
            
            if features:
                # Use the model to predict the label (category)
                prediction = model.predict([features])
                predicted_label = prediction[0]
                
                # Add the song to our sorted dictionary
                sorted_songs[predicted_label].append(f"{artist} - {name}")
            time.sleep(0.1)


Your Spotify Playlists:
  1. hiphop-workout (432 tracks)
  2. makeout (252 tracks)
  3. alt (252 tracks)
  4. lofi-downtempo (1243 tracks)
  5. instrumental-happy (235 tracks)
  6. ambient-focus (1067 tracks)
  7. atmospheric-room (200 tracks)
  8. acoustic-guitar (180 tracks)
  9. citypop (101 tracks)
  10. feel-good (408 tracks)
  11. edm-club (217 tracks)
  12. rap adjacent (200 tracks)
  13. chase (18 tracks)
  14. 🌬️  (27 tracks)
  15. upbeat ! (162 tracks)
  16. room (200 tracks)
  17. isntuemental but not really (58 tracks)
  18. canonsburg (72 tracks)
  19. 赞美诗 (8 tracks)
  20. jhus MOBILE VEHICLE (64 tracks)
  21. china (1 tracks)
  22. we gather and we sing (88 tracks)
  23. molbo (40 tracks)
  24. FALL CONF CAR (29 tracks)
  25. song (1 tracks)
  26. song (1 tracks)
  27. is nothing sacred? 23' Setlist (17 tracks)
  28. eric nam setlisttt (17 tracks)
  29. waiting room (73 tracks)
  30. KCM 2023-2024 (64 tracks)
  31. 7127 La Plata (41 tracks)
  32. LILIAN's Playlist (25 tr

Classifying: 100%|██████████| 200/200 [00:44<00:00,  4.46it/s]


In [8]:
# --- 4. Display Results ---

print("\n--- Classification Complete ---")
for label, songs in sorted_songs.items():
    print(f"\nCategory: {label}")
    for song_title in songs:
        print(f"   - {song_title}")


--- Classification Complete ---

Category: atmospheric-room
   - Gracie Abrams - Where do we go now?
   - Lizzy McAlpine - ceilings
   - Daniel Caesar - Neu Roses (Transgressor's Song)
   - Mac Miller - Surf
   - Mac Miller - Hand Me Downs
   - Mac Miller - Come Back to Earth
   - Conan Gray - Astronomy
   - Joji - SLOW DANCING IN THE DARK
   - Gracie Abrams - Mess It Up
   - Mac Miller - Congratulations (feat. Bilal)
   - Dijon - Skin
   - V - Sweet Night
   - V - Christmas Tree
   - d4vd - Sleep Well
   - d4vd - Here With Me
   - Labrinth - Still Don't Know My Name
   - JP Saxe - If the World Was Ending (feat. Julia Michaels)
   - Gracie Abrams - I Knew It, I Know You
   - Gracie Abrams - I Love You, I'm Sorry
   - Chezile - Beanie
   - Ed Sheeran - The A Team
   - Daniel Caesar - Streetcar

Category: hiphop-workout
   - Mac Miller - Good News
   - Mac Miller - Jet Fuel
   - JPEGMAFIA - either on or off the drugs

Category: makeout
   - Mac Miller - Wings

Category: chase
   - Twent

In [None]:
# --- 5. Create New Spotify Playlists ---
# This section takes the `sorted_songs` dictionary and creates playlists from it.

print("\n--- Creating Spotify Playlists ---")

# Get the current user's ID
user_id = sp.current_user()['id']

for playlist_name, tracks in sorted_songs.items():
    print(f"\nProcessing playlist: '{playlist_name}'...")

    # 1. Create a new empty playlist on Spotify
    new_playlist = sp.user_playlist_create(
        user=user_id,
        name=f"Sorted by ML: {playlist_name}",
        public=True,
        description=f"Songs classified as '{playlist_name}' by my personal ML model."
    )
    playlist_id = new_playlist['id']
    
    track_uris_to_add = []
    
    # 2. Find the URI for each track in the group
    print(f"Searching for {len(tracks)} tracks...")
    for song_title in tqdm(tracks, desc=f"Finding '{playlist_name}' tracks"):
        try:
            # --- KEY CHANGE HERE ---
            # Parse the 'Artist - Track' string to get individual parts
            artist, track = song_title.split(' - ', 1)
        except ValueError:
            # Skip if the song title doesn't contain ' - '
            continue
            
        # Build the search query
        query = f"artist:{artist.strip()} track:{track.strip()}"
        results = sp.search(q=query, type="track", limit=1)
        
        if results['tracks']['items']:
            uri = results['tracks']['items'][0]['uri']
            track_uris_to_add.append(uri)
            
    # 3. Add all found tracks to the new playlist in batches of 100
    if track_uris_to_add:
        print(f"Adding {len(track_uris_to_add)} songs to the playlist...")
        for i in range(0, len(track_uris_to_add), 100):
            batch = track_uris_to_add[i:i+100]
            sp.playlist_add_items(playlist_id, batch)
    else:
        print("No tracks were found on Spotify for this category.")

print("\nAll playlists have been created! Check your Spotify account.")

In [None]:
# 2. SPOTIPY: Fetch and Select a Playlist To Divide
try:
    playlists = sp.current_user_playlists()['items']
    print("\nYour Spotify Playlists:")
    for i, playlist in enumerate(playlists):
        print(f"  {i+1}. {playlist['name']} ({playlist['tracks']['total']} tracks)")

    choice = int(input("\nEnter the number of the playlist you want to process: "))
    chosen_playlist = playlists[choice - 1]
    playlist_id = chosen_playlist['id']

except (ValueError, IndexError):
    print("Invalid selection. Exiting.")
    exit()


Your Spotify Playlists:
  1. hiphop-workout (432 tracks)
  2. makeout (252 tracks)
  3. alt (252 tracks)
  4. lofi-downtempo (1243 tracks)
  5. instrumental-happy (235 tracks)
  6. ambient-focus (1067 tracks)
  7. atmospheric-room (200 tracks)
  8. acoustic-guitar (180 tracks)
  9. citypop (101 tracks)
  10. feel-good (408 tracks)
  11. edm-club (217 tracks)
  12. rap adjacent (200 tracks)
  13. chase (18 tracks)
  14. 🌬️  (27 tracks)
  15. upbeat ! (162 tracks)
  16. room (200 tracks)
  17. isntuemental but not really (58 tracks)
  18. canonsburg (72 tracks)
  19. 赞美诗 (8 tracks)
  20. jhus MOBILE VEHICLE (64 tracks)
  21. china (1 tracks)
  22. we gather and we sing (88 tracks)
  23. molbo (40 tracks)
  24. FALL CONF CAR (29 tracks)
  25. song (1 tracks)
  26. song (1 tracks)
  27. is nothing sacred? 23' Setlist (17 tracks)
  28. eric nam setlisttt (17 tracks)
  29. waiting room (73 tracks)
  30. KCM 2023-2024 (64 tracks)
  31. 7127 La Plata (41 tracks)
  32. LILIAN's Playlist (25 tr

In [5]:
# 3. SPOTIPY: Get All Tracks from the Chosen Playlist
print(f"\nFetching all tracks from '{chosen_playlist['name']}'...")
spotify_tracks = []
results = sp.playlist_items(playlist_id)

# Loop to handle pagination (playlists with >100 songs)
while results:
    for item in results['items']:
        track = item['track']
        if track: # Ensure track is not None
            artist_name = track['artists'][0]['name']
            track_name = track['name']
            spotify_tracks.append({'artist': artist_name, 'name': track_name})
    
    # Check if there is a next page of results
    if results['next']:
        results = sp.next(results)
    else:
        results = None

print(f"Found {len(spotify_tracks)} tracks.")


Fetching all tracks from 'room'...
Found 200 tracks.


In [6]:
track = network.get_track("Sabrina Carpenter", "Tears")
tags = track.get_top_tags()
print(tags)

[TopItem(item=pylast.Tag('sexy', pylast.LastFMNetwork('09355a2144fd5690fdf1de865586968f', '09355a2144fd5690fdf1de865586968f', '', '', '')), weight='100'), TopItem(item=pylast.Tag('female vocalist', pylast.LastFMNetwork('09355a2144fd5690fdf1de865586968f', '09355a2144fd5690fdf1de865586968f', '', '', '')), weight='25'), TopItem(item=pylast.Tag('freaky', pylast.LastFMNetwork('09355a2144fd5690fdf1de865586968f', '09355a2144fd5690fdf1de865586968f', '', '', '')), weight='16'), TopItem(item=pylast.Tag('Disco', pylast.LastFMNetwork('09355a2144fd5690fdf1de865586968f', '09355a2144fd5690fdf1de865586968f', '', '', '')), weight='16'), TopItem(item=pylast.Tag('boogie', pylast.LastFMNetwork('09355a2144fd5690fdf1de865586968f', '09355a2144fd5690fdf1de865586968f', '', '', '')), weight='8'), TopItem(item=pylast.Tag('dance-pop', pylast.LastFMNetwork('09355a2144fd5690fdf1de865586968f', '09355a2144fd5690fdf1de865586968f', '', '', '')), weight='8'), TopItem(item=pylast.Tag('sassy', pylast.LastFMNetwork('09355a

In [14]:
# 3. LAST.FM: Fetch tags for each track
song_data = []
SCORE_THRESHOLD = 100 
print("Fetching tags for each track...")

for track in tqdm(spotify_tracks, desc="Processing tracks"):
    if track['artist'] and track['name']:
        try:
            lfm_track = network.get_track(track['artist'], track['name'])
            top_tags = lfm_track.get_top_tags()

            if top_tags:
                song_tags_with_weights = {tag.item.name.lower(): int(tag.weight) for tag in top_tags}
                song_tag_names = set(song_tags_with_weights.keys()) # A set of just the tag names for easy lookups
                
                playlist_genres = []

                for genre_category, tag_lists in mood_map.items():
                    required_tags = set(tag_lists['required'])
                    supporting_tags = tag_lists['supporting']
                    
                    # 1. First Check: Does the song have ANY of the required tags?
                    # If the required list is empty OR there's an intersection, proceed.
                    if not required_tags or required_tags.intersection(song_tag_names):
                        
                        # 2. Second Check: Calculate the score
                        category_score = 0
                        all_category_tags = required_tags.union(supporting_tags)
                        
                        for tag_name in all_category_tags:
                            if tag_name in song_tags_with_weights:
                                category_score += song_tags_with_weights[tag_name]
                        
                        if category_score >= SCORE_THRESHOLD:
                            playlist_genres.append(genre_category)

                if playlist_genres:
                    song_data.append({
                        "artist": track['artist'],
                        "track": track['name'],
                        "playlist_genres": playlist_genres
                    })
            
            time.sleep(0.5)

        except pylast.WSError:
            print(f"Skipping track due to API error: {e}")
            continue
        except Exception as e:
            print(f"An unexpected error occurred for {track['name']}: {e}")
            continue

print(f"\nSuccessfully collected tag data for {len(song_data)} songs.")

Fetching tags for each track...


Processing tracks: 100%|██████████| 200/200 [02:00<00:00,  1.66it/s]


Successfully collected tag data for 14 songs.





In [15]:

print(song_data)

[{'artist': 'Daniel Caesar', 'track': "Neu Roses (Transgressor's Song)", 'playlist_genres': ['asian-rnb']}, {'artist': 'Mac Miller', 'track': 'Hand Me Downs', 'playlist_genres': ['asian-rnb']}, {'artist': 'Mac Miller', 'track': 'Good News', 'playlist_genres': ['hiphop']}, {'artist': 'Mac Miller', 'track': 'Wings', 'playlist_genres': ['asian-rnb']}, {'artist': 'Conan Gray', 'track': 'Astronomy', 'playlist_genres': ['acoustic-guitar', 'alt-acoustic']}, {'artist': 'Twenty One Pilots', 'track': 'Chlorine', 'playlist_genres': ['asian-rnb']}, {'artist': 'Gracie Abrams', 'track': 'Mess It Up', 'playlist_genres': ['alt-loud', 'alt-acoustic']}, {'artist': 'd4vd', 'track': 'Sleep Well', 'playlist_genres': ['asian-rnb']}, {'artist': 'd4vd', 'track': 'Here With Me', 'playlist_genres': ['alt-loud', 'alt-acoustic', 'ambient-focus', 'instrumental-happy']}, {'artist': 'Novo Amor', 'track': 'Anchor', 'playlist_genres': ['acoustic-guitar']}, {'artist': 'Labrinth', 'track': "Still Don't Know My Name", 'p

In [None]:
""" 
# 4. Create and save DataFrame
if song_data:
    df = pd.DataFrame(song_data)

    # Convert the list of tags into a string for easier CSV storage
    df['playlist genres'] = df['playlist genres'].apply(lambda x: ', '.join(x))

    output_filename = 'grouped_songs.csv'
    df.to_csv(output_filename, index=False)

    print(f"\nSuccess! Your data has been saved to '{output_filename}'.")
    print("Here's a preview of your data:")
    print(df.head())
else:
    print("\nNo tag data was collected. This could be because the songs in your playlist have no tags on Last.fm.")

"""