In [8]:
import pandas as pd
from dotenv import load_dotenv
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

load_dotenv()

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials())

In [6]:
data = []

results = sp.playlist_items("5Rh7ikX5dteMXfc8tmeBJy")
for idx, item in enumerate(results['items']):
    track = item['track']

    features = sp.audio_features(track["id"])
    track_features = features[0]

    data.append([
        track["id"],                        # string
        track["name"],                      # string
        track['artists'][0]['name'],        # string
        track['album']['name'],             # string
        track["popularity"],                # integer  0-100  % (current trendiness)
        track["duration_ms"],               # integer
        track_features["acousticness"],     # float    0-1    probability (whether the track uses instrumental rather than electric sounds)
        track_features["danceability"],     # float    0-1    probability (suitability for dancing)
        track_features["energy"],           # float    0-1    % (intensity / activity)
        track_features["instrumentalness"], # float    0-1    probability (whether the track contains no vocals)
        track_features["key"],              # integer  -1-11  (-1: none identified, 0: C, ...)
        track_features["liveness"],         # float    0-1    probability (whether the song is a live performance)
        track_features["loudness"],         # float           dB
        track_features["mode"],             # integer  0,1    (0: minor or 1: major)
        track_features["speechiness"],      # float    0-1    % (how many words are spoken rather than sang)
        track_features["tempo"],            # float           BPM
        track_features["time_signature"],   # integer  3-7    time signature (x/4, example: 3/4)
        track_features["valence"],          # float    0-1    spectrum (0: sad / angry, 1: happy)
        # TODO add explicit
        # TODO add release year
    ])

In [14]:
df = pd.DataFrame(data, columns=['id', 'name', 'artist', 'album', 'popularity', 'duration', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence'])

updated = df.drop([
    'artist', # irrelevant
    'album', # irrelevant
    'popularity', # popularity isn't suitable to measure similarity of songs
    'duration', # not relevant to measure song similarity
    'liveness', # not relevant to measure song similarity
    # TODO loudness?
    # TODO mode?
    # TODO do we need tempo AND time_signature?
], axis='columns')

print(updated.columns)

Index(['id', 'name', 'acousticness', 'danceability', 'energy',
       'instrumentalness', 'key', 'loudness', 'mode', 'speechiness', 'tempo',
       'time_signature', 'valence'],
      dtype='object')
