# More Than a Feeling

This notebook will download music metadata from the Spotify API and use it to train an unsupervised model that associates songs together based on their audio features. The model will be used to create a playlist of songs that are similar to a seed song.

In [None]:
%pip install spotipy

In [1]:
# Import necessary libraries
import os
import sys
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# Import the Spotipy library
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# Import libraries to use MusicBrainz API
import musicbrainzngs as mb

# Import the libraries for the Unsupervised Learning model
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


In [13]:
# Set the Spotify API credentials
os.environ["SPOTIPY_CLIENT_ID"] = "3d5abd956131402db5573fe68302d1ba"
os.environ["SPOTIPY_CLIENT_SECRET"] = "ccf62c34a64b447b962955498ed27ebd"

# Create the Spotify object
spot = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

In [None]:
# Define the function to create a DataFrame with the song titles and audio features of the top 1000 songs on Spotify
def get_top_1000_songs():
    # Create an empty DataFrame
    df = pd.DataFrame(columns=["song_title", "artist", "popularity", "acousticness", "danceability", "energy", "instrumentalness", "liveness", "loudness", "speechiness", "tempo", "valence"])
    
    # Get the top 1000 songs on Spotify
    for i in range(0, 1000, 50):
        results = spot.search(q="year:2021", type="track", limit=50, offset=i)
        for track in results["tracks"]["items"]:
            song_title = track["name"]
            artist = track["artists"][0]["name"]
            popularity = track["popularity"]
            audio_features = spot.audio_features(track["id"])[0]
            acousticness = audio_features["acousticness"]
            danceability = audio_features["danceability"]
            energy = audio_features["energy"]
            instrumentalness = audio_features["instrumentalness"]
            liveness = audio_features["liveness"]
            loudness = audio_features["loudness"]
            speechiness = audio_features["speechiness"]
            tempo = audio_features["tempo"]
            valence = audio_features["valence"]
            df = df.append({"song_title": song_title, "artist": artist, "popularity": popularity, "acousticness": acousticness, "danceability": danceability, "energy": energy, "instrumentalness": instrumentalness, "liveness": liveness, "loudness": loudness, "speechiness": speechiness, "tempo": tempo, "valence": valence}, ignore_index=True)
    
    return df


In [14]:
def get_top_1000_songs():
    # Create an empty list to store the dataframes
    dfs = []
    
    # Get the top 1000 songs on Spotify
    for i in range(0, 50, 50):
        results = spot.search(q="year:2021", type="track", limit=50, offset=i)
        for track in results["tracks"]["items"]:
            song_title = track["name"]
            artist = track["artists"][0]["name"]
            popularity = track["popularity"]
            audio_features = spot.audio_features(track["id"])[0]
            df = pd.DataFrame({"song_title": [song_title], "artist": [artist], "popularity": [popularity]})
            dfs.append(df)
    
    # Concatenate the dataframes into a single dataframe
    df = pd.concat(dfs, ignore_index=True)
    
    return df

In [15]:
# View the DataFrame
df = get_top_1000_songs()
df.head()

Max Retries reached


SpotifyException: http status: 429, code:-1 - /v1/audio-features/?ids=3HTa0ufQtxHATYMKDpl5HD:
 Max Retries, reason: too many 429 error responses

In [None]:
%pip install musicbrainzngs

In [29]:
# Set the MusicBrainz API credentials
mb.set_useragent("Music Recommendation System", "1.0", "zenferno@gmail.com")    

# Define the function to get 1000 most popular songs from the MusicBrainz API using pandas.concat
def get_top_1000_mb_songs():
    # Create an empty list to store the dataframes
    dfs = []
    
    # Get the top 1000 most popular songs from the MusicBrainz API produced in the U.S.
    for i in range(0, 1000, 100):
        results = mb.search_recordings(limit=100, offset=i, query="date:2023 AND country:US AND type:single")
        for recording in results["recording-list"]:
            song_title = recording["title"]
            artist = recording["artist-credit"][0]["artist"]["name"]
            genre = recording["genre-list"][0]["name"] if "genre-list" in recording else None
            popularity = recording["ext:score"]
            df = pd.DataFrame({"song_title": [song_title], "artist": [artist], "genre": [genre], "popularity": [popularity]})
            dfs.append(df)
            print(song_title, artist, genre, popularity)

    # Concatenate the dataframes into a single dataframe
    df = pd.concat(dfs, ignore_index=True)

    return df


In [30]:
# View the DataFrame
df = get_top_1000_mb_songs()

Good Things Kaylee Bell None 100
Sin The Last Arizona None 100
Good Dreams The Last Arizona None 100
When Summer Rolls Around Kaylee Bell None 100
Nights Like This Kaylee Bell None 100
pheromones XINA None 100
Boots 'n All Kaylee Bell None 100
Stuck Always Last None 100
Nothing Lasts Emily Yacina None 100
1987 DIAMANTE None 100
Outta My Head Jada Facer None 100
You, Every Time Jada Facer None 100
Always and Forever SERRA None 100
Codependency Dial Tone None 100
SUNSHINE - Da'Raja None 100
Juicy Lucy Kareem Rahma None 100
No, There's No Hope For You Remo Drive None 100
hesitation rosie None 100
Lil Birdie DJ Harrison None 100
Heaven Levitation Room None 100
Broke Down Engine Woody Platt None 100
BREAK & TEAR! GR!N None 100
Forgive Me Father J-BLEV None 100
Castaway The Elovaters None 100
Come And Get It The Elovaters None 100
BEACHY Daddy Yankee None 100
LA HORA Y EL DÍA Daddy Yankee None 100
BEACHY (Play‐N‐Skillz House Remix) Daddy Yankee None 100
Until I Can’t JONJEN None 100
Iconic (

In [31]:
# Get the columns from MusicBrainz API
df.columns

Index(['song_title', 'artist', 'genre', 'popularity'], dtype='object')

In [33]:
# search the df for a specific song from 2023
df[df["song_title"].str.contains("Illuminate")]

Unnamed: 0,song_title,artist,genre,popularity
998,Illuminate (dub mix),Avoure,,100
999,Illuminate,Avoure,,100


In [12]:
# output the df to a csv file
df.to_csv("musicbrainz.csv")