# Predicting Spotify Playlist Adds 

## Part 1: Data Collection

This notebook is designed to extract all relevant data from Spotify's API, utilizing the 'spotify_api_access.py' module that is located in the same directory. Specifically, we'll authenticate and retrieve user tokens (myself and my friends), and compile datasets of track features and artist information.  

In [1]:
#import libraries
import pandas as pd
import numpy as np

#import API keys
import import_ipynb
import spotify_secret

#import Spotify Access module 
from spotify_api_access import SpotifyAccess

importing Jupyter notebook from spotify_secret.ipynb


In [2]:
#load credentials required to access API
user_id = spotify_secret.user_1()
scope = spotify_secret.scope()
cid = spotify_secret.cid()
secret = spotify_secret.secret_key()
redirect = 'http://localhost/'

In [3]:
#initialize access to Spotify's API
user_1 = SpotifyAccess(user_id=user_id, cid=cid, secret=secret, redirect_uri=redirect, scope=scope)
#authenticate spotify client --> enter redirected URL when prompted
user_1_sp = user_1.get_spotify_auth()
user_1_sp

<spotipy.client.Spotify at 0x111694fd0>

In [4]:
#get list of all playlists of user
user_1_playlists = user_1.get_user_playlist_id(user_id)
user_1_playlists

{'iizy': '2j4RCgMRrt7tUnmcOTaBx7',
 'speakeasy': '0eWnoY26ImSfXi9BCydus7',
 'blueberries and raspberries': '6psNk66updZmTzDG0ApNpQ',
 'maple': '2XQn1FuSKQnC1hY3DZzmfs',
 'honeydew melon': '1ZP1YO6sQDv8jINH4UC8ft',
 'honey baked feels': '0pHRlOLgIZAUFvZzeOFBZu',
 'Strawberry': '1LWz8108lCzceCpuTXYn8Y',
 'honey': '6sR95OkNQk3s0BwdBlWUhG',
 'beets': '4Q56pfYEikvJYVcSbmhoQB',
 'rotten fruits': '0Raj4pYhETvcLO0wNgdm19',
 'エDM': '37kqXkx2Z9UtucF97tQXAG',
 'happz': '5hxSDMJy2JeI2dBzvALRb2',
 'lax': '7x0DIb3PSj7Pn29psrRswa',
 'future': '41Yr2mVgIcCClo1pXKZfqF',
 'techy': '3qV0DAmHJVTqr8JAIkHEzP',
 'bass': '6sVu23APEWncCHiuR4Rja7',
 'wub wub': '7jC3tOF5DYiHqDuH6iXRFk',
 'Ezoo 2017 Sat & Sun': '3ATIYr0apkZkndoV4YLpYk',
 "electric zoo '18": '68ZktIZNBWFxcDrMkqIJEU',
 'throwback': '7oYrf8cQUBs1lsKT6l6P3D',
 'jazz it up': '5QUx4mRywzDiTg0viNHy4m',
 'nah': '48nnBlCkbgTmY68G3nUg1D',
 'a wavey journey': '6Fp4VF75uKm56xVQOC1gN6',
 "relaxin'": '31KXzJ2evXlzfTBcxrFoK2',
 'Drift n High': '02wL7mQMueZzvur8

In [5]:
#iterate through each playlist and grab every track information
user_1_track_objects = [user_1.get_all_track_objects(pl_id) for pl_id in user_1_playlists.values()]
#append track objects into single list
user_1_track_obj_list = []
for tracks in user_1_track_objects:
    for track in tracks:
        user_1_track_obj_list.append(track)

In [6]:
#extract high level track information from track objects (keeping in mind some tracks have multiple artists)
user_1_track_info = user_1.get_track_info(user_1_track_obj_list)
user_1_df = pd.DataFrame.from_dict(user_1_track_info, orient='index', columns=['track_name', 'artist_name', 'artist_id', 'release_date', 'album_type', 'popularity'])
user_1_df.reset_index(inplace=True)
user_1_df.rename({'index':'track_id'}, axis=1, inplace=True)
user_1_df.head()

Unnamed: 0,track_id,track_name,artist_name,artist_id,release_date,album_type,popularity
0,0sR0deixp6xTZ5Yx9g0pr0,Release Me,[Miette Hope],[4K61UysqQc1VRj8VsY76Qw],2018-06-22,single,34
1,0doiRAg2YNQvamY6oMPwkw,Dancing in the Street,[Stephen Day],[4cnFw4bkIWVGKUBsr93OS5],2018-04-27,single,4
2,0NuIfPuBhUAt4aA4HP0rUu,You Seemed so Happy,[The Japanese House],[3IunaFjvNKj98JW89JYv9u],2019-03-01,album,53
3,5EXD5rXJ4IVb8g4xSwT0fc,If You Were the Rain,[Stephen Day],[4cnFw4bkIWVGKUBsr93OS5],2016-04-08,single,47
4,68ViVsxqymVrKMnAbVsEhU,Talking Slow,[Dylan Jordan],[0vQwQkQbLxrapE7TG9o5SB],2018-12-12,single,48


In [7]:
#for each of the tracks, extract the audio features and append to dataframe 
#creaate a list tof track id's 
user_1_track_ids = user_1_df['track_id'].tolist()
user_1_audio_feat = pd.DataFrame(user_1.get_audio_features(user_1_track_ids))
user_1_audio_feat.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.517,0.48,11,-7.944,0,0.0425,0.683,0.0,0.076,0.539,134.012,audio_features,0sR0deixp6xTZ5Yx9g0pr0,spotify:track:0sR0deixp6xTZ5Yx9g0pr0,https://api.spotify.com/v1/tracks/0sR0deixp6xT...,https://api.spotify.com/v1/audio-analysis/0sR0...,212236,4
1,0.632,0.677,11,-8.105,1,0.0911,0.263,1e-05,0.0977,0.512,119.763,audio_features,0doiRAg2YNQvamY6oMPwkw,spotify:track:0doiRAg2YNQvamY6oMPwkw,https://api.spotify.com/v1/tracks/0doiRAg2YNQv...,https://api.spotify.com/v1/audio-analysis/0doi...,199027,4
2,0.519,0.888,11,-6.232,1,0.117,0.0406,0.0,0.227,0.416,149.938,audio_features,0NuIfPuBhUAt4aA4HP0rUu,spotify:track:0NuIfPuBhUAt4aA4HP0rUu,https://api.spotify.com/v1/tracks/0NuIfPuBhUAt...,https://api.spotify.com/v1/audio-analysis/0NuI...,162486,4
3,0.481,0.412,9,-8.413,1,0.0401,0.611,5.9e-05,0.0998,0.326,84.358,audio_features,5EXD5rXJ4IVb8g4xSwT0fc,spotify:track:5EXD5rXJ4IVb8g4xSwT0fc,https://api.spotify.com/v1/tracks/5EXD5rXJ4IVb...,https://api.spotify.com/v1/audio-analysis/5EXD...,220588,4
4,0.501,0.533,2,-10.11,1,0.0379,0.202,0.00206,0.113,0.151,138.92,audio_features,68ViVsxqymVrKMnAbVsEhU,spotify:track:68ViVsxqymVrKMnAbVsEhU,https://api.spotify.com/v1/tracks/68ViVsxqymVr...,https://api.spotify.com/v1/audio-analysis/68Vi...,259640,4


In [8]:
#merge audio features with track info
user_1_df = user_1_df.merge(user_1_audio_feat, how='left', left_on='track_id', right_on='id').drop(['type', 'id', 'uri', 'track_href', 'analysis_url'], axis=1)
user_1_df.head()

Unnamed: 0,track_id,track_name,artist_name,artist_id,release_date,album_type,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,0sR0deixp6xTZ5Yx9g0pr0,Release Me,[Miette Hope],[4K61UysqQc1VRj8VsY76Qw],2018-06-22,single,34,0.517,0.48,11,-7.944,0,0.0425,0.683,0.0,0.076,0.539,134.012,212236,4
1,0doiRAg2YNQvamY6oMPwkw,Dancing in the Street,[Stephen Day],[4cnFw4bkIWVGKUBsr93OS5],2018-04-27,single,4,0.632,0.677,11,-8.105,1,0.0911,0.263,1e-05,0.0977,0.512,119.763,199027,4
2,0NuIfPuBhUAt4aA4HP0rUu,You Seemed so Happy,[The Japanese House],[3IunaFjvNKj98JW89JYv9u],2019-03-01,album,53,0.519,0.888,11,-6.232,1,0.117,0.0406,0.0,0.227,0.416,149.938,162486,4
3,5EXD5rXJ4IVb8g4xSwT0fc,If You Were the Rain,[Stephen Day],[4cnFw4bkIWVGKUBsr93OS5],2016-04-08,single,47,0.481,0.412,9,-8.413,1,0.0401,0.611,5.9e-05,0.0998,0.326,84.358,220588,4
4,68ViVsxqymVrKMnAbVsEhU,Talking Slow,[Dylan Jordan],[0vQwQkQbLxrapE7TG9o5SB],2018-12-12,single,48,0.501,0.533,2,-10.11,1,0.0379,0.202,0.00206,0.113,0.151,138.92,259640,4


In [9]:
#for each artist, get their corresponding genre
#create list of unique artist_ids 
artist_ids_1 = user_1_df['artist_id'].explode().unique().tolist()
#parse through get_artist_genres function
user_1_genres, user_1_followers = user_1.get_artist_info(artist_ids_1)
user_1_artist_info = pd.DataFrame({
    'artist_genre' : user_1_genres,
    'artist_follower_count': user_1_followers}).reset_index().rename({'index':'artist_id'}, axis=1)

retrying ...1secs


In [10]:
#we can observe that some artists do not have genres
user_1_artist_info.head()

Unnamed: 0,artist_id,artist_genre,artist_follower_count
0,4K61UysqQc1VRj8VsY76Qw,[],7547
1,4cnFw4bkIWVGKUBsr93OS5,"[indie cafe pop, indie r&b, indiecoustica]",20710
2,3IunaFjvNKj98JW89JYv9u,"[art pop, electropop, indie pop, indie poptimi...",351533
3,0vQwQkQbLxrapE7TG9o5SB,[],21188
4,5dCvSnVduaFleCnyy98JMo,"[k-indie, k-pop, korean pop, korean r&b]",428238


In [11]:
#check the shape of user 1's track info
user_1_df.shape

(2499, 20)

In [12]:
#repeat these steps to extract data from second user
user_id_2 = spotify_secret.user_2()
#initialize access to Spotify's API with second user
user_2 = SpotifyAccess(user_id=user_id_2, cid=cid, secret=secret, redirect_uri=redirect, scope=scope)
#authenticate spotify client --> enter redirected URL when prompted
user_2_sp = user_2.get_spotify_auth()
user_2_sp

<spotipy.client.Spotify at 0x1264bb850>

In [13]:
#extract all playlist names and ids from user
user_2_playlists = user_2.get_user_playlist_id(user_id_2)
user_2_playlists

{'(^^^)': '1vh3B5nXJmS759DY9jS836',
 '☁️': '4152m4RTAhEGM7W6bExEea',
 'xxx type beat ': '6UJO9WgYcu3rNSQ21WNmcs',
 'ny rap': '5LdmIdfdw1Xm16Wk0cLKnM',
 'caribana 2020': '6OeaKeQTUF5Ks7UfSoLyW5',
 'majin buul v2': '4JdGenhfufj8tHUwFBYqKJ',
 'yoshis island': '4d0xX2sjtwJmmtLUKiDcIW',
 'This Is Major Lazer': '37i9dQZF1DX0MiLwiDL8lJ',
 'rap': '2FqpG8lhxivLKRg14jfpbz',
 'happy': '4NK6DhrEk4PNDS4WRAl2Z7',
 'gym': '7CSgX0etFnIcGsHq9dEBLm',
 'tiktok made me do it': '2iRiPjRd0MhC09BRb3FCBC',
 'caribbean': '4d24pLBqcMUfSld85aydNa',
 'uh': '4zpETkptsR0yshSbxEK2FB',
 'majin buul v1': '4Dw4WNIJiP5gs0NculQ8Sr',
 'headbang shit': '71Elpc5Vpioj8kKKwmLQDh',
 'el playlisto': '05iS97228w22TicN0LWKxD',
 'r&b': '6UuxBp5KsG9GrRCPpIRguf',
 'edm': '3ZSQrvxZ9PoT6FpaZAa6HV',
 'k r&b/hiphop ': '4LIbYMEtxZWAy4HvwH5ukd'}

In [14]:
#iterate through each playlist and grab every track information
user_2_track_objects = [user_2.get_all_track_objects(pl_id) for pl_id in user_2_playlists.values()]
#append track objects into single list
user_2_track_obj_list = []
for tracks in user_2_track_objects:
    for track in tracks:
        user_2_track_obj_list.append(track)

In [15]:
#extract high level track information from track objects (keeping in mind some tracks have multiple artists)
user_2_track_info = user_2.get_track_info(user_2_track_obj_list)
user_2_df = pd.DataFrame.from_dict(user_2_track_info, orient='index', columns=['track_name', 'artist_name', 'artist_id', 'release_date', 'album_type', 'popularity'])
user_2_df.reset_index(inplace=True)
user_2_df.rename({'index':'track_id'}, axis=1, inplace=True)
user_2_df.head()

Unnamed: 0,track_id,track_name,artist_name,artist_id,release_date,album_type,popularity
0,01c9sDv4paB121N6l2BFKY,Babylon,"[Jauz, Tisoki]","[5ttgIeUVka6FLyi00Uu5h8, 0XW7mqhbaQnRtHmwfAVg64]",2018-08-31,album,35
1,48iZQ89FuDSmqRk1AjRcrP,OK!,"[Jauz, San Holo]","[5ttgIeUVka6FLyi00Uu5h8, 0jNDKefhfSbLR9sFvcPLHo]",2016-10-10,single,42
2,6DlpWCFx1fsv0LCa3AQ81D,Infected,"[Tiësto, Jauz]","[2o5jDhtHVPhrJdv3cEQ99Z, 5ttgIeUVka6FLyi00Uu5h8]",2016-07-15,single,43
3,03IxJiB8ZOH9hEQZF5mCNY,Feel The Volume,[Jauz],[5ttgIeUVka6FLyi00Uu5h8],2014-11-11,single,55
4,4BNWc6VNhfANsYMeTxGw4R,Get To Me,[Jauz],[5ttgIeUVka6FLyi00Uu5h8],2019-10-11,single,46


In [16]:
#for each of the tracks, extract the audio features and append to dataframe 
#creaate a list tof track id's 
user_2_track_ids = user_2_df['track_id'].tolist()
user_2_audio_feat = pd.DataFrame(user_2.get_audio_features(user_2_track_ids))
user_2_audio_feat.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.611,0.92,1,-3.531,1,0.144,0.0232,0.0015,0.147,0.451,145.005,audio_features,01c9sDv4paB121N6l2BFKY,spotify:track:01c9sDv4paB121N6l2BFKY,https://api.spotify.com/v1/tracks/01c9sDv4paB1...,https://api.spotify.com/v1/audio-analysis/01c9...,293000,4
1,0.615,0.885,10,-3.281,0,0.0397,0.0174,0.00422,0.101,0.112,127.939,audio_features,48iZQ89FuDSmqRk1AjRcrP,spotify:track:48iZQ89FuDSmqRk1AjRcrP,https://api.spotify.com/v1/tracks/48iZQ89FuDSm...,https://api.spotify.com/v1/audio-analysis/48iZ...,188438,4
2,0.605,0.974,5,-3.996,0,0.169,0.0764,0.312,0.128,0.082,128.03,audio_features,6DlpWCFx1fsv0LCa3AQ81D,spotify:track:6DlpWCFx1fsv0LCa3AQ81D,https://api.spotify.com/v1/tracks/6DlpWCFx1fsv...,https://api.spotify.com/v1/audio-analysis/6Dlp...,225000,4
3,0.725,0.918,10,-4.912,0,0.0676,0.153,0.0328,0.113,0.201,125.009,audio_features,03IxJiB8ZOH9hEQZF5mCNY,spotify:track:03IxJiB8ZOH9hEQZF5mCNY,https://api.spotify.com/v1/tracks/03IxJiB8ZOH9...,https://api.spotify.com/v1/audio-analysis/03Ix...,230400,4
4,0.574,0.899,1,-3.619,1,0.0483,0.321,0.0,0.0927,0.387,128.06,audio_features,4BNWc6VNhfANsYMeTxGw4R,spotify:track:4BNWc6VNhfANsYMeTxGw4R,https://api.spotify.com/v1/tracks/4BNWc6VNhfAN...,https://api.spotify.com/v1/audio-analysis/4BNW...,256875,4


In [17]:
#merge audio features with track info
user_2_df = user_2_df.merge(user_2_audio_feat, how='left', left_on='track_id', right_on='id').drop(['type', 'id', 'uri', 'track_href', 'analysis_url'], axis=1)
user_2_df.head()

Unnamed: 0,track_id,track_name,artist_name,artist_id,release_date,album_type,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,01c9sDv4paB121N6l2BFKY,Babylon,"[Jauz, Tisoki]","[5ttgIeUVka6FLyi00Uu5h8, 0XW7mqhbaQnRtHmwfAVg64]",2018-08-31,album,35,0.611,0.92,1,-3.531,1,0.144,0.0232,0.0015,0.147,0.451,145.005,293000,4
1,48iZQ89FuDSmqRk1AjRcrP,OK!,"[Jauz, San Holo]","[5ttgIeUVka6FLyi00Uu5h8, 0jNDKefhfSbLR9sFvcPLHo]",2016-10-10,single,42,0.615,0.885,10,-3.281,0,0.0397,0.0174,0.00422,0.101,0.112,127.939,188438,4
2,6DlpWCFx1fsv0LCa3AQ81D,Infected,"[Tiësto, Jauz]","[2o5jDhtHVPhrJdv3cEQ99Z, 5ttgIeUVka6FLyi00Uu5h8]",2016-07-15,single,43,0.605,0.974,5,-3.996,0,0.169,0.0764,0.312,0.128,0.082,128.03,225000,4
3,03IxJiB8ZOH9hEQZF5mCNY,Feel The Volume,[Jauz],[5ttgIeUVka6FLyi00Uu5h8],2014-11-11,single,55,0.725,0.918,10,-4.912,0,0.0676,0.153,0.0328,0.113,0.201,125.009,230400,4
4,4BNWc6VNhfANsYMeTxGw4R,Get To Me,[Jauz],[5ttgIeUVka6FLyi00Uu5h8],2019-10-11,single,46,0.574,0.899,1,-3.619,1,0.0483,0.321,0.0,0.0927,0.387,128.06,256875,4


In [18]:
#for each artist, get their corresponding genre
#create list of unique artist_ids 
artist_ids_2 = user_2_df['artist_id'].explode().unique().tolist()
#parse through get_artist_genres function
user_2_genres, user_2_followers = user_2.get_artist_info(artist_ids_2)
user_2_artist_info = pd.DataFrame({
    'artist_genre' : user_2_genres,
    'artist_follower_count': user_2_followers}).reset_index().rename({'index':'artist_id'}, axis=1)

In [20]:
#observe user 2's artist info
user_2_artist_info.head()

Unnamed: 0,artist_id,artist_genre,artist_follower_count
0,5ttgIeUVka6FLyi00Uu5h8,"[bass house, brostep, edm, electro house, elec...",325977
1,0XW7mqhbaQnRtHmwfAVg64,"[brostep, electro house, electronic trap, zaps...",37132
2,0jNDKefhfSbLR9sFvcPLHo,"[edm, electro house, electronic trap, electrop...",550744
3,2o5jDhtHVPhrJdv3cEQ99Z,"[big room, brostep, dance pop, dutch edm, edm,...",4935665
4,548YUkLaLzti0BLqaWpn1W,"[bass house, brostep, electro house, electroni...",43906


In [21]:
user_2_df.shape

(1452, 20)

In [22]:
#save all datasets as a csv
user_1_df.to_csv('data/user_1.csv', index=False)
user_2_df.to_csv('data/user_2.csv', index=False)
user_1_artist_info.to_csv('data/user_1_artist_info.csv', index=False)
user_2_artist_info.to_csv('data/user_2_artist_info.csv', index=False)