https://www.linkedin.com/pulse/extracting-your-fav-playlist-info-spotifys-api-samantha-jones/

https://towardsdatascience.com/reverse-engineering-spotify-wrapped-ai-using-python-452b58ad1a62

https://towardsdatascience.com/extracting-song-data-from-the-spotify-api-using-python-b1e79388d50

# Import and Settings
### Python - 3.9.10

In [3]:
import spotipy
import numpy as np
import pandas as pd
import seaborn as sb
from creds import cid, secret
import matplotlib.pyplot as mpl
from spotipy.oauth2 import SpotifyClientCredentials

In [4]:
#  Your Spotify API app credentials
# cid = 'your_client id'
# secret = 'your_client secret'

client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)


In [5]:
# Playlist Link
playlist_link = "https://open.spotify.com/playlist/5ABHKGoOzxkaa28ttQV9sE?si=cb97d0c0919449a7"

playlist_URI = playlist_link.split("/")[-1].split("?")[0]

In [6]:
playlist_features_list = [
    "track_id",
    "track_name",
    "artist",
    "album",
    "tempo",
    "duration_ms",
    "danceability",
    "energy",
    "key",
    "loudness",
    "mode",
    "speechiness",
    "instrumentalness",
    "liveness",
    "valence",
    "time_signature"
    ]

df = pd.DataFrame(columns = playlist_features_list)
    
playlist = sp.user_playlist_tracks("spotify", playlist_URI)["items"]
for track in playlist:
    # Create empty dict
    playlist_features = {}
    # Get metadata
    playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
    playlist_features["album"] = track["track"]["album"]["name"]
    playlist_features["track_name"] = track["track"]["name"]
    playlist_features["track_id"] = track["track"]["id"]
    
    # Get audio features
    audio_features = sp.audio_features(playlist_features["track_id"])[0]
    for feature in playlist_features_list[4:]:
        playlist_features[feature] = audio_features[feature]
    
    # Concat the dfs
    track_df = pd.DataFrame(playlist_features, index = [0])
    df = pd.concat([df, track_df], ignore_index = True)

# return playlist_df


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

# Column Info & Conversions

In [None]:
df.head()

# df.describe()

# df.isna().any()

# df.dtypes

In [None]:
# Converting and Renaming
df = df.astype({"tempo": float,
                "duration_ms": float,
                "danceability": float,
                "energy": float,
                "key": float,
                "loudness": float,
                "mode": int,
                "speechiness": float,
                "instrumentalness": float,
                "liveness": float,
                "valence": float,
                "time_signature": int
                # "energy": float,
                 })

                 
df.rename(columns={
#     'artist':'Artist',
#     'album':'Album',
#     'track_name':'Track', 
#     'track_id':'ID', 
#     'danceability':'Danceability', 
#     'energy':'Energy', 
#     'key':'Key', 
#     'loudness':'Loudness', 
#     'mode':'Mode', 
#     'speechiness':'Speechiness', 
#     'instrumentalness':'Instrumentalness', 
#     'liveness':'Liveness', 
#     'valence':'Valence', 
#     'tempo':'Tempo', 
    'duration_ms':'duration_m'
#     'time_signature':'Time_Signature'
    }, inplace=True)

df['duration_m'] = (df['duration_m'] / 1000 ) /60

df[['track_name','speechiness']]


# Specific Metrics

In [None]:

# Max / Min Tempo
df[df['tempo']==df['tempo'].min()][['track_name', 'tempo','duration_m']]

# Longest Song
# df[df['duration_m']==df['duration_m'].max()]['track_name']


# Temp
df[df['speechiness']==df['speechiness'].max()][['track_name', 'artist','speechiness']]
# df[df['speechiness']==df['speechiness'].min()][['track_name', 'artist','speechiness']]


# Visualisations

In [None]:
dur_hist = df[['duration_m']]

duration_max = int(df[['duration_m']].max() + 0.5)
duration_min = int(df[['duration_m']].min())

fig = mpl.figure(figsize=(15, 25))

mpl.suptitle('Duration Distribution',
                horizontalalignment="right",
                fontstyle = "normal", 
                fontsize = 35, 
                fontfamily = "sans-serif")

for i in range(dur_hist.shape[1]):
    mpl.subplot(6, 3, i + 1)
    f = mpl.gca()
    f.set_title(dur_hist.columns.values[i],fontsize='15')

    vals = np.size(dur_hist.iloc[:, i].unique())
    if vals >= 100:
        vals = 100
        
    mpl.hist(dur_hist.iloc[:, i], 
                bins=vals, 
                color = 'white')

# X axis tick rate
mpl.xticks(np.arange(duration_min, duration_max, 0.5))
mpl.style.use('dark_background') # Because dark mode everything
mpl.tight_layout(rect=[0, 0.05, 2, 0.95])

In [None]:
tempo_hist = df[['tempo']]

tempo_max = int(df[['tempo']].max() + 1)
tempo_min = int(df[['tempo']].min() - 1)

fig = mpl.figure(figsize=(15, 25))

mpl.suptitle('Tempo Distribution',
                horizontalalignment="left",
                fontstyle = "normal", 
                fontsize = 40, 
                fontfamily = "sans-serif")

for i in range(tempo_hist.shape[1]):
    mpl.subplot(6, 3, i + 1)
    f = mpl.gca()
    f.set_title(tempo_hist.columns.values[i],fontsize='15')

    vals = np.size(tempo_hist.iloc[:, i].unique())
    if vals >= 100:
        vals = 100
        
    mpl.hist(tempo_hist.iloc[:, i], 
                bins=vals, 
                color = 'white')

# X axis tick rate
mpl.xticks(np.arange(tempo_min, tempo_max, 2))
mpl.style.use('dark_background') # Because dark mode everything
mpl.tight_layout(rect=[0, 0.05, 5, 0.95])

In [None]:
ds2 = df[["tempo",
            "duration_m",
            "danceability",
            "energy",
            "key",
            "loudness",
            "mode",
            "speechiness",
            "instrumentalness",
            "liveness",
            "valence",
            "time_signature"
            ]]

# ds2 = df

fig = mpl.figure(figsize=(15, 25))

mpl.suptitle('Blanket Histogram',
                horizontalalignment="center",
                fontstyle = "normal", 
                fontsize = 24, 
                fontfamily = "sans-serif")

for i in range(ds2.shape[1]):
    mpl.subplot(6, 3, i + 1)
    f = mpl.gca()
    f.set_title(ds2.columns.values[i],fontsize='15')

    vals = np.size(ds2.iloc[:, i].unique())
    if vals >= 100:
        vals = 100
        
    mpl.hist(ds2.iloc[:, i], 
                bins=vals, 
                color = 'white')

mpl.style.use('dark_background') # Because dark mode everything
mpl.tight_layout(rect=[0, 0.03, 1, 0.95])

In [None]:
# ds2 = df[['tempo', 
#             'energy',
#             'duration_m',
#             'speechiness',
#             'danceability'
#             # 'duration_m',
#             ]]

ds2 = df

correlations = ds2.corrwith(df.tempo)
correlations = correlations[correlations!=1]

positive_correlations = correlations[correlations >0].sort_values(ascending = False)
negative_correlations =correlations[correlations <0].sort_values(ascending = False)

# Printing values
print('Most Positive Correlations: \n', positive_correlations)
print('\nMost Negative Correlations: \n', negative_correlations)

correlations.plot.bar(figsize = (10, 7), 
                        fontsize = 12, 
                        color = 'white',
                        rot = 30)

mpl.title('Tempo Correlation \n',
            horizontalalignment="center", 
            fontstyle = "normal", 
            fontsize = "22", 
            fontfamily = "sans-serif")

In [None]:
# ds2 = df[['tempo', 
#             'energy',
#             'duration_m',
#             'speechiness',
#             'danceability'
#             # 'duration_m',
#             ]]

ds2 = df

correlations = ds2.corrwith(df.valence)
correlations = correlations[correlations!=1]

positive_correlations = correlations[correlations >0].sort_values(ascending = False)
negative_correlations =correlations[correlations <0].sort_values(ascending = False)

# Printing values
print('Most Positive Correlations: \n', positive_correlations)
print('\nMost Negative Correlations: \n', negative_correlations)

correlations.plot.bar(figsize = (10, 7), 
                        fontsize = 12, 
                        color = 'white',
                        rot = 30
                        )

mpl.title('Valence (Positivity) Correlation \n',
            horizontalalignment="center", 
            fontstyle = "normal", 
            fontsize = "22", 
            fontfamily = "sans-serif")