In [2]:
#Analyze and extract various audio track features from multiple spotify playlists into one .csv file.  
import spotipy
import pandas as pd
import json
import itertools

In [3]:
username = "Aleksander Tidemann"
playlists = {
    "ja" : "2BY4yc5VgroCuO22NktiV0?si=FuDm_0VSQbO0zDigCtgomg",
    "nei" : "0u50QEFaOhco85LsYbhdk1?si=PWX0899_TGuapmDhzLl2fg"
}

#Authenticate my user from Spotify
CLIENT_ID = "5424fe3bb2b646c8a2144d03d41f22c7"
CLIENT_SECRET = "f13ad8573f3444908afdf6a6199db827"
token = spotipy.oauth2.SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
sp = spotipy.Spotify(auth_manager=token)

# Functions for gathering features

## Collect scalar features, single numbers per song

In [4]:
def analyze_features(playlist):
    #variables specifying the features we want to extract and where to store them.
    playlist_features = {}
    playlist_features_list = ["track_name", 
                              "duration_ms", "energy", "danceability", "loudness", "valence", "tempo", "time_signature"]
    df_playlist = pd.DataFrame(columns = playlist_features_list)

    for track in playlist:
        #Basic metadata
        #playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
        #playlist_features["album"] = track["track"]["album"]["name"]
        playlist_features["track_name"] = track["track"]["name"]

        audio_features = sp.audio_features(track["track"]["id"])[0]
        for feature in playlist_features_list[1:]:
            playlist_features[feature] = audio_features[feature]

        #store and concatonate in dataframes
        track_df = pd.DataFrame(playlist_features, index=[0])
        df_playlist = pd.concat([df_playlist, track_df], ignore_index=True)
        
    return df_playlist

## Collect vectorial and scalar features, multiple values times per track

In [5]:
def analyze_analysis(playlist, df_features):
    #Dicts specifying the analysis features we want to extract, and a dict to store them in. 
    #The is dynamic so audio features can be added if wanted. Complete list avaliable at:
    #https://developer.spotify.com/documentation/web-api/reference/tracks/get-audio-analysis/
    analysis_features_gathered = {}
    analysis_feature_list = {
        "segments" : [
            "pitches",
            "timbre"
        ],
        "sections" : [
            "key",
            "mode"
        ]
    }

    #Colunm names and framwork for our final dataframe
    framework = []
    for i, (key, val) in enumerate(analysis_feature_list.items()):
        for idx in range(len(val)):
            framework.append(val[idx])
    df_framework = pd.DataFrame(columns=framework)

    for count, track in enumerate(playlist):
        #First store the track duration in milliseconds
        track_dur = df_features["duration_ms"][count]
        
        #gather the desired values (expressed in the analysis_feature_list) from each track 
        #and store them in our new analysis_features_gathered dictionary.
        audio_analysis = sp.audio_analysis(track["track"]["id"])
        for i, (key, val) in enumerate(analysis_feature_list.items()):
            for item in range(len(val)):
                #Initialize the key to the correct type. Always store within list.
                analysis_features_gathered[val[item]] = []  
                for x, (y, z) in enumerate(audio_analysis.items()):
                    if y == key:
                        for w in range(len(z)):
                            analysis_features_gathered[val[item]].append(z[w][val[item]])
                            
        #Now we want to configure and calculate different kinds of averages of the lists and numbers
        #we have gathered per segment and section of each track.
        for i, (key, val) in enumerate(analysis_features_gathered.items()):
            p = []
            if type(val[0]) == list: #and is therefore ONLY either timbre or pitch in the segment key. else it would be an int or float.
                for w in range(len(val[0])):
                    store = []
                    for item in val:
                        store.append(item[w])
                    p.append(round(sum(store) / len(store), 2))
                analysis_features_gathered[key] = []
                analysis_features_gathered[key].append(p)
            else:
                if key == "key":
                    #precentage value indicating how many key changes happen in the track, based on its duration.
                    val = [k for k, g in itertools.groupby(val)] #remove consecutive duplicate values.
                    ms_per_chord_change = track_dur/len(val) #how many miliseconds per chord change on average.
                    change_percentage = (100/track_dur)*ms_per_chord_change
                    p.append(abs(change_percentage-100)) #invert values for a more intuitive reading.
                    analysis_features_gathered[key] = p
                else:
                    p.append(round(sum(val)/len(val), 2))
                    analysis_features_gathered[key] = p

        #store and concatonate in dataframes
        df_analysis_features = pd.DataFrame(analysis_features_gathered)
        df_framework = pd.concat([df_framework, df_analysis_features], ignore_index=True)
    
    return df_framework

# Gather the specified features from the Spotify playlists

In [6]:
def main(playlist_dict):
    #gather items from multiple playlists into one dataframe.
    for i, (key, val) in enumerate(playlist_dict.items()):
        playlist = sp.user_playlist(username, val)["tracks"]["items"]
        df_playlist_features = analyze_features(playlist)
        df_playlist_analysis = analyze_analysis(playlist, df_playlist_features) #ADD THE PREVIOUS DATAFRAME HERE.
        df_analysis_combined = pd.concat([df_playlist_features, df_playlist_analysis], ignore_index=False, axis=1)
        
        #rename some coloumns
        df_analysis_combined = df_analysis_combined.rename(columns = {"pitches": "pitch_avg", 
                                                                      "timbre" : "timbre_avg", 
                                                                      "key": "key_change_percentage", 
                                                                      "mode": "mode_avg"}, inplace=False)
        #Add playlist key
        df_analysis_combined['playlist'] = key
        
        if i == 0:
            playlist_dict_df = df_analysis_combined
        else:
            playlist_dict_df = pd.concat([playlist_dict_df, df_analysis_combined], ignore_index=True)
            
    return playlist_dict_df

# Collect the features and export to file

In [7]:
df_multiple_playlists = main(playlists)
df_multiple_playlists.head()
# df_multiple_playlists.to_csv(r"./datasets/bach.csv", index=False)

# A nice way to view dictionary data. inspect the raw datax 
#print(json.dumps(track, indent=4 sort_keys=True))

SpotifyException: http status: 400, code:-1 - Unsupported URL / URI., reason: None

# Create the rating/target of each track in a seperate dataset

In [7]:
col = {
    'track_name':[],
    'rating': []
}

for i in range(len(df_multiple_playlists)):
    pl = df_multiple_playlists['playlist'][i]
    track = df_multiple_playlists['track_name'][i]
    col['track_name'].append(track)
    rating = input(f'Rating of "{track}" in playlist "{pl}" : ')
    col['rating'].append(rating)
        
df_ratings = pd.DataFrame(col, columns=['track_name','rating'])
df_ratings.to_csv("./datasets/test_ratings.csv", index=True)


Rating of "Concerto No. 4 in G major, BWV 1049: II. Andante" in playlist "ja" : 7
Rating of "Violin Concerto in A Minor, BWV 1041: II. Andante" in playlist "ja" : 8
Rating of "Mass in B Minor, BWV 232: Agnus Dei: Agnus Dei (Alto)" in playlist "ja" : 7
Rating of "The Well-Tempered Clavier,BWV 846: Prelude I in C Major" in playlist "ja" : 8
Rating of "Bach: Fugee, BWV 543" in playlist "ja" : 8
Rating of "Matthäus Passion, BWV 244: Recitativo, "Da hub er an, sich zu verfluchen"" in playlist "ja" : 8
Rating of "Matthäus Passion, BWV 244: Aria, "Erbarme dich"" in playlist "ja" : 9
Rating of "Suite No.3 In D, BWV 1068: 2. Air" in playlist "ja" : 9
Rating of "Gottes Zeit ist die allerbeste Zeit, Sonatina az "Actus tragicus"-ból, BWV 106" in playlist "ja" : 7
Rating of "Bach, JS: Concerto for 4 Pianos in A Minor, BWV 1065: II. Largo" in playlist "ja" : 7
Rating of "Goldberg Variations, BWV 988: Aria" in playlist "ja" : 8
Rating of "Fugue, In C Minor, BWV 961" in playlist "ja" : 7
Rating of "To

Rating of "Aria mit 30 Veränderungen, BWV 988 "Goldberg Variations" - Arranged for String Trio by Dmitry Sitkovetsky: Var. 8 a 2 Clav." in playlist "nei" : 3
Rating of "Mache dich, mein Geist, bereit, BWV 115: IV. Air "Bete aber auch dabei"" in playlist "nei" : 3
Rating of "French Suite No.2 in C minor, BWV 813: 4. Air" in playlist "nei" : 5
Rating of "Jauchzet Gott in allen Landen Cantata, BWV 51 (Additional Instrumentation By W.F. Bach): 1. "Jauchzet Gott in allen Landen"" in playlist "nei" : 2
Rating of "Von Himmel hoch, da komm' ich her, BWV 606" in playlist "nei" : 4
Rating of "Jesu meine Freude Motet, BWV 227: So nun der Geist" in playlist "nei" : 5
Rating of "Aria mit 30 Veränderungen, BWV 988 "Goldberg Variations" - Arranged for String Trio by Dmitry Sitkovetsky: Var. 18 Canone alla Sesta a 1 Clav." in playlist "nei" : 3
Rating of "Aria mit 30 Veränderungen, BWV 988 "Goldberg Variations" - Arranged for Harp by Catrin Finch: Var. 22 Alla breve a 1 Clav." in playlist "nei" : 3
Ra