In [101]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from matplotlib import pyplot as plt
import seaborn as sns

import itertools

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util

%matplotlib inline

In [95]:
CLIENT_ID="810e2eec841546269c49f338f1be189a"
CLIENT_SECRET="764290d0ab0f4f4484b0d4c7b701bdee"

token = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)

cache_token = token.get_access_token()
sp = spotipy.Spotify(cache_token)

In [96]:
sample_playlist = sp.user_playlist("joycex99", "0yWeSBDVEwGPLMH7EXI3tX")

In [42]:
#Get (song id, song name, popularity) out of playlists
def extract_songs(playlist):
    tracks = playlist["tracks"]
    info = [(item["track"]["id"], item["track"]["name"], item["track"]["popularity"]) for item in tracks["items"]]
    while tracks["next"]:
        tracks = sp.next(tracks)
        info.extend([(item["track"]["id"], item["track"]["name"], item["track"]["popularity"]) for item in tracks["items"]])
    return info

In [54]:
songs = extract_songs(sample_playlist)
print(len(songs))
print(f"Verification: playlist had {sample_playlist['tracks']['total']} tracks")
print(songs[:30])

679
Verification: playlist had 679 tracks
[('3ftfaNstpkuUIMxlYOl1WX', 'Perfect Color', 44), ('4KnnWpUyPfQD6X5SQSTlH3', 'Turn Back Time', 68), ('074Gs4Omblzhjx6XOrIl3b', 'Famous', 5), ('0OlnLZY4cmQzT6ZGttvWBM', 'So Far Away (feat. Jamie Scott & Romy Dya)', 79), ('4jQdq2hql3n64VySrmR8Ro', 'Molino', 34), ('6H0zRPEV1ezBHOidNXSt1D', 'Fractures (feat. Nevve)', 62), ('0S9TAJWUQ4cfK0NHctq7y2', 'Stellar - SAMME Remix', 13), ('53phWBiwb4x7OX29CTRilj', 'Wishing Seed - Dulsae Remix', 13), ('5gIRPQWULwrvIt0F6pY7ph', 'Stranger Things', 20), ('68ZdNEIJ1v0sb0JKlPA1Z3', 'Strobe - ATTLAS Remix', 37), ('4nGW06TRJOIrVRfjnuK4se', 'Out Of My Head', 23), ('5qxLvazzinSf4Cb1N6zIhb', 'With You (feat. Quinn XCII)', 60), ('3jb6ByvjXfeM1p0ISGN584', 'Friends - Recorded at Spotify Studios NYC', 49), ('6jhm26E6XN4lUZkNBrKCqd', 'Simplicity Is Bliss', 44), ('5fchndbL7vo8plyRreaKtJ', 'Landslide', 5), ('0nhZciYtNkzeLJEApTytYW', 'New You', 34), ('3LoLRXZgZO3FhqLdUenZKC', 'Belong - Aguelando Remix', 30), ('0j724c6yAW6wovAj

In [66]:
sp.audio_features(songs[0][0])

[{'acousticness': 0.0469,
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/3ftfaNstpkuUIMxlYOl1WX',
  'danceability': 0.539,
  'duration_ms': 234572,
  'energy': 0.855,
  'id': '3ftfaNstpkuUIMxlYOl1WX',
  'instrumentalness': 9.5e-05,
  'key': 0,
  'liveness': 0.52,
  'loudness': -4.987,
  'mode': 1,
  'speechiness': 0.0583,
  'tempo': 138.028,
  'time_signature': 4,
  'track_href': 'https://api.spotify.com/v1/tracks/3ftfaNstpkuUIMxlYOl1WX',
  'type': 'audio_features',
  'uri': 'spotify:track:3ftfaNstpkuUIMxlYOl1WX',
  'valence': 0.346}]

In [85]:
''' Returns a list of each track's audio features'''
def features(songs):
    all_feats = []
    ids = [song[0] for song in songs]
    # Spotify's audio_features can only return 50 songs at once
    for i in range(0, len(songs), 50):
        track_feats = sp.audio_features(ids[i:i+50])
        for j in range(len(track_feats)):
            track_feats[j]['name'] = songs[i+j][1]
            track_feats[j]['popularity'] = songs[i+j][2]
        all_feats.extend(track_feats)
    return all_feats

In [97]:
feats = features(songs)
feats[100]

{'acousticness': 0.148,
 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/1839RWzPRZCVgvamu4sHSN',
 'danceability': 0.59,
 'duration_ms': 191331,
 'energy': 0.871,
 'id': '1839RWzPRZCVgvamu4sHSN',
 'instrumentalness': 0,
 'key': 11,
 'liveness': 0.0739,
 'loudness': -3.353,
 'mode': 0,
 'name': 'All My Love (feat. Conor Maynard)',
 'popularity': 70,
 'speechiness': 0.0891,
 'tempo': 134.956,
 'time_signature': 4,
 'track_href': 'https://api.spotify.com/v1/tracks/1839RWzPRZCVgvamu4sHSN',
 'type': 'audio_features',
 'uri': 'spotify:track:1839RWzPRZCVgvamu4sHSN',
 'valence': 0.304}

## Training

In [98]:
data = pd.DataFrame(feats)
data = data.sample(frac=1)
data.head()

Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,mode,name,popularity,speechiness,tempo,time_signature,track_href,type,uri,valence
554,0.0293,https://api.spotify.com/v1/audio-analysis/3B54...,0.726,233902,0.769,3B54sVLJ402zGa6Xm4YGNe,0.0101,6,0.104,-5.043,1,Unforgettable,86,0.123,97.985,4,https://api.spotify.com/v1/tracks/3B54sVLJ402z...,audio_features,spotify:track:3B54sVLJ402zGa6Xm4YGNe,0.733
367,0.111,https://api.spotify.com/v1/audio-analysis/0RUX...,0.448,255893,0.885,0RUXHlYhA057fCOe2vqIqu,4e-06,9,0.0476,-3.755,1,Ships In The Night,55,0.0562,178.151,4,https://api.spotify.com/v1/tracks/0RUXHlYhA057...,audio_features,spotify:track:0RUXHlYhA057fCOe2vqIqu,0.134
341,0.253,https://api.spotify.com/v1/audio-analysis/6uBh...,0.69,210091,0.622,6uBhi9gBXWjanegOb2Phh0,0.0,5,0.116,-5.025,0,Stay (with Alessia Cara),84,0.0622,102.04,4,https://api.spotify.com/v1/tracks/6uBhi9gBXWja...,audio_features,spotify:track:6uBhi9gBXWjanegOb2Phh0,0.544
321,0.0144,https://api.spotify.com/v1/audio-analysis/0fYV...,0.526,195200,0.862,0fYVliAYKHuPmECRs1pbRf,0.0597,2,0.229,-6.003,1,Renegades,76,0.0905,90.052,4,https://api.spotify.com/v1/tracks/0fYVliAYKHuP...,audio_features,spotify:track:0fYVliAYKHuPmECRs1pbRf,0.528
654,0.000339,https://api.spotify.com/v1/audio-analysis/3GnL...,0.905,177604,0.596,3GnLo84IkdSWCPYt6tnLll,2.1e-05,1,0.0897,-7.496,0,HUMBLE.,12,0.118,149.996,4,https://api.spotify.com/v1/tracks/3GnLo84IkdSW...,audio_features,spotify:track:3GnLo84IkdSWCPYt6tnLll,0.422


In [99]:
features = ["acousticness", "danceability", "energy", 
            "loudness", "speechiness", "tempo", "valence"]

In [100]:
train, test = train_test_split(data, test_size=0.15)
x_train, y_train = train[features], train["popularity"]
x_test, y_test = test[features], test["popularity"]
print("Training size: {}, Test size: {}".format(len(train), len(test)))

Training size: 577, Test size: 102


In [103]:
lin_mod = LinearRegression()
lin_mod.fit(x_train, y_train)



LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)