In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LinearRegression
# from sklearn.metrics import mean_squared_error

"Libraries to help with jupyter notebook usage"
# Increases jupyter notebook display width
from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

from IPython.display import Image # Helps display images in notebook

In [2]:
import sys
"Appends both parent and grandparent dir to current path, to allow importing"
sys.path.append('..')
sys.path.append('../..')

from jupyternotebook_utils import *
from utils import *
from data_processing import *
from spotify_data import *
from spotify_utils import *

In [3]:
spotify_scopes = "playlist-read-private playlist-modify-private playlist-modify-public user-read-recently-played user-top-read user-library-modify"
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id,
                                               client_secret=client_secret,
                                               redirect_uri="http://127.0.0.1:8000/callback",
                                               scope=spotify_scopes))

audio_features_list = ['danceability', 'energy', 'key', 'loudness', 'speechiness', 
                       'acousticness', 'instrumentalness', 'liveness', 'valence',
                       'tempo', 'duration_ms']

In [4]:
user_playlists = get_user_playlists(sp)

user_playlists

[{'playlist_name': 'COLDPLAY 2024',
  'playlist_uri': 'spotify:playlist:6GmE6FZnc31SRHEfS2tNii',
  'playlist_href': 'https://open.spotify.com/playlist/6GmE6FZnc31SRHEfS2tNii',
  'playlist_track_count': 1},
 {'playlist_name': '//lo-fi beats 🎶🌕',
  'playlist_uri': 'spotify:playlist:3p4ZD7MTwrL4OaiEN1dFfu',
  'playlist_href': 'https://open.spotify.com/playlist/3p4ZD7MTwrL4OaiEN1dFfu',
  'playlist_track_count': 425},
 {'playlist_name': "//vibin' 🎺",
  'playlist_uri': 'spotify:playlist:4YB4UFkYYJhy3iNaHZoGXa',
  'playlist_href': 'https://open.spotify.com/playlist/4YB4UFkYYJhy3iNaHZoGXa',
  'playlist_track_count': 91},
 {'playlist_name': '//productive mario-ing',
  'playlist_uri': 'spotify:playlist:51yNcvKUbFtC6WwJi4KHH5',
  'playlist_href': 'https://open.spotify.com/playlist/51yNcvKUbFtC6WwJi4KHH5',
  'playlist_track_count': 1306},
 {'playlist_name': '//du zi da zi 🏋🏻',
  'playlist_uri': 'spotify:playlist:1f2neve1uJPOCfkahOkdEf',
  'playlist_href': 'https://open.spotify.com/playlist/1f2neve

In [5]:
target_playlist = 'spotify:playlist:4YB4UFkYYJhy3iNaHZoGXa'
playlist_audio_features = get_user_playlist_audio_features(sp, target_playlist)

playlist_audio_features

[{'id': 1,
  'danceability': 0.606,
  'energy': 0.569,
  'key': 1,
  'loudness': -10.08,
  'speechiness': 0.0393,
  'acousticness': 0.208,
  'instrumentalness': 0.000278,
  'liveness': 0.17,
  'valence': 0.603,
  'tempo': 73.789,
  'duration_ms': 218667},
 {'id': 2,
  'danceability': 0.514,
  'energy': 0.612,
  'key': 4,
  'loudness': -5.981,
  'speechiness': 0.0298,
  'acousticness': 0.142,
  'instrumentalness': 0,
  'liveness': 0.0868,
  'valence': 0.342,
  'tempo': 125.99,
  'duration_ms': 185005},
 {'id': 3,
  'danceability': 0.404,
  'energy': 0.806,
  'key': 2,
  'loudness': -4.75,
  'speechiness': 0.0496,
  'acousticness': 0.198,
  'instrumentalness': 0,
  'liveness': 0.114,
  'valence': 0.112,
  'tempo': 148.036,
  'duration_ms': 220730},
 {'id': 4,
  'danceability': 0.333,
  'energy': 0.637,
  'key': 1,
  'loudness': -4.904,
  'speechiness': 0.0581,
  'acousticness': 0.131,
  'instrumentalness': 1.8e-05,
  'liveness': 0.149,
  'valence': 0.132,
  'tempo': 139.898,
  'duration_

In [6]:
df = pd.DataFrame(playlist_audio_features)
df = df[audio_features_list]

before_df = df.copy()
after_df = df.copy()

before_df.describe()

Unnamed: 0,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
count,91.0,91.0,91.0,91.0,91.0,91.0,91.0,91.0,91.0,91.0,91.0
mean,0.571055,0.495758,5.67033,-8.488253,0.056271,0.495365,0.002865,0.173495,0.41866,120.773846,203605.582418
std,0.132913,0.210728,3.493025,3.056107,0.063613,0.331843,0.018769,0.116136,0.208643,30.898445,37832.776372
min,0.164,0.115,0.0,-18.502,0.0265,0.000802,0.0,0.0115,0.068,50.827,127249.0
25%,0.4985,0.3385,2.0,-10.1995,0.0319,0.1365,0.0,0.1045,0.2355,100.9725,174628.0
50%,0.583,0.457,6.0,-8.436,0.0381,0.547,0.0,0.126,0.426,114.943,201412.0
75%,0.6575,0.64,9.0,-6.2655,0.0501,0.806,1.9e-05,0.1925,0.54,137.934,229116.0
max,0.836,0.927,11.0,-1.921,0.439,0.966,0.162,0.69,0.864,210.871,315476.0


In [7]:
# Step 1: Min-max normalization for selected features
input_data_ranges = {
    'loudness': (-60, 0),
    'tempo': (0, 250),
    'key': (-1, 11)
}
for feature, (feature_min, feature_max) in input_data_ranges.items():
    after_df[feature] = (after_df[feature] - feature_min) / (feature_max - feature_min)

# Step 2: Keep selected audio features
selected_features = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'liveness', 'valence', 'tempo']
filtered_data = after_df[selected_features].copy()

# Step 3: Cube root transformation for selected features
cubed_features = ['speechiness', 'acousticness', 'liveness']
for feature in cubed_features:
    filtered_data[feature] = np.cbrt(filtered_data[feature])

# Step 4: Logarithmic transformation for selected features
log_features = ['energy', 'loudness']
for feature in log_features:
    filtered_data[feature] = np.log1p(filtered_data[feature])

# Step 5: IQR-filtering based on thresholds
lower_thresholds =  {'danceability': 0.16850000000000004, 'energy': 0.14936944969853527, 'loudness': 0.5734535346157168, 
                     'speechiness': 0.11183139278235099, 'acousticness': -0.35285124201451823, 'liveness': 0.20276263879283324, 
                     'valence': -0.24249999999999988, 'tempo': 0.16009000000000007}
upper_thresholds = {'danceability': 1.0525, 'energy': 0.8605850164229211, 'loudness': 0.698995265917933, 
                    'speechiness': 0.6924946680943335, 'acousticness': 1.280417950271011, 'liveness': 0.8754111662234604, 
                    'valence': 1.2494999999999998, 'tempo': 0.7998179999999999}

for feature in selected_features:
    filtered_data[feature] = np.where(
        (filtered_data[feature] >= lower_thresholds[feature]) &
        (filtered_data[feature] <= upper_thresholds[feature]),
        filtered_data[feature],
        np.nan
    )

# Step 6: Min-max Normalization
feature_min = {'danceability': 0.169, 'energy': 0.1501426584297195, 'loudness': 0.5734623376139655, 
               'speechiness': 0.27977873676275317, 'acousticness': 0.0, 'liveness': 0.21074564860592623, 
               'valence': 0.0, 'tempo': 0.164644}

feature_max = {'danceability': 0.991, 'energy': 0.6931471805599453, 'loudness': 0.6983917371326527, 
               'speechiness': 0.692435557262704, 'acousticness': 0.9986648849277057, 'liveness': 0.8750340122833274, 
               'valence': 1.0, 'tempo': 0.799812}

for feature in selected_features:
    if feature in feature_min:
        after_df[feature] = (after_df[feature] - feature_min[feature]) / (feature_max[feature] - feature_min[feature])
    
# Step 7: Calculate playlist vector
playlist_vector = filtered_data.mean()
playlist_vector = (playlist_vector / playlist_vector.sum()).round(4)

playlist_vector = playlist_vector.sort_values(ascending=False)

# Printing Playlist Vector and Playlist Audio Features
average_before = before_df[selected_features].mean()
playlist_audio_features_array = average_before.to_numpy().round(4)

print("Playlist Audio Features:")
print(average_before)
print()
print([int(value * 10000) / 10000 for value in playlist_audio_features_array])
print()

print("Playlist Vector:")
print(playlist_vector)

Playlist Audio Features:
danceability      0.571055
energy            0.495758
loudness         -8.488253
speechiness       0.056271
acousticness      0.495365
liveness          0.173495
valence           0.418660
tempo           120.773846
dtype: float64

[0.5711, 0.4958, -8.4883, 0.0563, 0.4954, 0.1734, 0.4187, 120.7738]

Playlist Vector:
acousticness    0.1748
loudness        0.1516
danceability    0.1402
liveness        0.1298
tempo           0.1167
valence         0.1020
energy          0.0987
speechiness     0.0861
dtype: float64


In [9]:
track_uri = 'spotify:track:1LeSp4o3CeNhJz3kCWgi6E'
play_track_in_browser(track_uri)

{'id': '', 'track_uri': 'spotify:track:15CiMy2pHSFj5KTSq5405t', 'track_title': 'Berdiri Bulu Romaku', 'track_url': 'https://open.spotify.com/track/15CiMy2pHSFj5KTSq5405t', 'track_popularity': 31, 'release_date': '1998-12-31', 'album_uri': 'spotify:album:5MeQiAcaROkMoOFfooVi0u'}
