In [None]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

auth_manager = SpotifyClientCredentials(client_id="YOUR CLIENT ID", client_secret="YOUR CLIENT SECRET")
sp = spotipy.Spotify(auth_manager=auth_manager)

In [None]:
# Loading a *public* Spotify playist using Spotipy
playlist = sp.playlist_tracks('PUBLIC PLAYLIST ID')
print(len(playlist['items']))
playlist['items'][2]['track']['id'] # Path to Spotify IDs (for reference)

In [None]:
# Creating a list of Spotify track IDs from the playlist
ids = [items['track']['id'] for items in playlist['items']]
print(ids)
batch = 40
for i in range(0, len(ids), batch):
    print(','.join(ids[i:(i+batch)]))

In [None]:
# "Get multiple track" call from reccobeats API
import requests
import json

qualities = {}
for i in range(0, len(ids), batch):
    id_list = ','.join(ids[i:(i+batch)])
    url = f"https://api.reccobeats.com/v1/track?ids={id_list}"

    payload = {}
    headers = {
      'Accept': 'application/json'
    }

    response = requests.request("GET", url, headers=headers, data=payload)
    print(json.loads(response.text))

    data = json.loads(response.text)

    for element in data['content']:
        qualities[element['id']] = dict()
        # features[element['id']]['popularity'] = element['popularity']
        qualities[element['id']]['durationMs'] = element['durationMs']
        qualities[element['id']]['href'] = element['href']
        qualities[element['id']]['popularity'] = [item['track']['popularity'] for item in playlist['items'] if element['href'] == f"https://open.spotify.com/track/{item['track']['id']}"][0]

recco_ids = qualities.keys()

In [None]:
# Loading audio features into a dataframe
import pandas as pd

data = []
for id in recco_ids:
    url = f"https://api.reccobeats.com/v1/track/{id}/audio-features"
    
    payload = {}
    headers = {
        'Accept': 'application/json'
    }

    response = requests.request("GET", url, headers=headers, data=payload)
    
    data.append(json.loads(response.text))
    
for item in data:
    if 'id' in item:
        qualities[item['id']].update({k: v for k, v in item.items()})

df = pd.DataFrame(qualities.values())
df.head()

In [None]:
df.dropna(inplace=True)
print(df.head())
print(df.info())

In [None]:
# Loading and testing the best model
import numpy as np
import joblib

df['lduration'] = np.log(df['durationMs'])
features = ['lduration', 'danceability', 'energy', 'loudness', 'acousticness', 'instrumentalness', 'valence', 'liveness']

X_df = df[features]

scaler = joblib.load('/Users/ebro/Downloads/spotipy/scaler.pkl')
df_scaled = scaler.transform(X_df)

rf = joblib.load('/Users/ebro/Downloads/spotipy/rf_model.pkl')
y_pred = rf.predict(df_scaled)

df['prediction'] = y_pred
print(df['prediction'].value_counts())
df

In [None]:
# Measuring performance using confusion matrix metrics
def performance(df):
    TP = ((df['prediction'] == 1) & (df['popularity'] >= 50)).sum()
    FP = ((df['prediction'] == 1) & (df['popularity'] < 50)).sum()
    TN = ((df['prediction'] == 0) & (df['popularity'] < 50)).sum()
    FN = ((df['prediction'] == 0) & (df['popularity'] >= 50)).sum()

    accuracy = round(((TP + TN) / (TP + FP + TN + FN)), 3)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    area_under_curve = round((TP / (TP + FN)) * (FP / (FP + TN)), 3)

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"AUC: {area_under_curve}")


performance(df)