In [1]:
import requests
from Secret import *
import base64, json
import pandas as pd
import time

In [2]:
authUrl= 'https://accounts.spotify.com/api/token'
authHeader = {}
authData= {}

In [3]:
#Function to retrieve access token
def getAccessToken(clientID, clientSecret):
    message= f"{clientID}:{clientSecret}"
    
    message_bytes= message.encode('ascii')
    base64_bytes= base64.b64encode(message_bytes)
    base64_message = base64_bytes.decode('ascii')
    
    print(base64_message)
    
    
    authHeader['Authorization'] = "Basic " + base64_message
    authData['grant_type'] = "client_credentials"
    res = requests.post(authUrl, headers= authHeader, data=authData)
    
    print(res)
    
    responseObject = res.json()
    print(json.dumps(responseObject, indent = 2))
    
    accessToken= responseObject['access_token']
    
    return accessToken

In [4]:
#Function to retrieve user's playlist
def getPlaylist(token, userID, limit, offset):
    playlistEndpoint= f"https://api.spotify.com/v1/users/{userID}/playlists?country=us&limit={limit}&offset={offset}"
    getHeader = {
        "Authorization": "Bearer " + token
        }
    res=requests.get(playlistEndpoint, headers=getHeader)
    playlistObject=res.json()
    return playlistObject

In [5]:
#Function to retreive Spotify's playlists based on genre
def getGenrePlaylist(token, categoryID, limit, offset):
    playlistEndpoint= f"https://api.spotify.com/v1/browse/categories/{categoryID}/playlists?limit={limit}&offset={offset}"
    getHeader = {"Accept": "application/json",
                "Content-Type": "application/json",
                "Authorization": "Bearer " + token
                }
    res=requests.get(playlistEndpoint, headers=getHeader)
    playlistObject=res.json()
    return playlistObject

In [6]:
#Function to retrieve tracks from playlists
def getPlaylistTracks(token, playlistID):
    playlistEndPoint= f"https://api.spotify.com/v1/playlists/{playlistID}/tracks"
    
    getHeader = {
        "Authorization": "Bearer " + token,
        "Content-Type": "application/json"
        }
    res= requests.get(playlistEndPoint, headers=getHeader)
    
    playlistObject= res.json()
    return playlistObject

In [7]:
#Function to retrieve audio features from tracks
def getAudioFeatures(token, trackID):
    trackendpoint= f"https://api.spotify.com/v1/audio-features/{trackID}"
    getHeader = {"Accept": "application/json",
                "Content-Type": "application/json",
                "Authorization": "Bearer " + token
                }
    res=requests.get(trackendpoint, headers=getHeader)
    playlistobject=res.json()
    return playlistobject

Cells below will take a few minutes to run

In [10]:
#API Request to retrieve my playlist tracks based on my user ID

df = pd.DataFrame(columns=['track_id', 'name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'energy','instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature']
                 )

token= getAccessToken(clientID, clientSecret)
playlists=getPlaylist(token,userID, 50, 0)

#iterate through user playlists
for p in playlists['items']:
    playlistID= p['id']
    tracklist= getPlaylistTracks(token, playlistID)
    
    #collect information from tracks in playlist
    for t in tracklist['items'][:50]:
        album= t['track']['album']['name']
        release_date=t['track']['album']['release_date']
        for a in t['track']['artists']:
            artist= a['name']
        songName = t['track']['name']
        popularity = t['track']['popularity']
        trackID= t['track']['id']
        features= getAudioFeatures(token, trackID)
        length= features['duration_ms']
        danceability= features['danceability']
        acousticness= features['acousticness']
        energy = features['energy']
        instrumentalness = features['instrumentalness']
        liveness= features['liveness']
        loudness = features['loudness']
        speechiness = features['speechiness']
        tempo = features['tempo']
        time_signature= features['time_signature']
        
    
        df= df.append({'track_id': trackID,'name':songName, 'album':album, 'artist':artist, 'release_date':release_date,'length': length, 'popularity':popularity, 'danceability':danceability, 'acousticness':acousticness, 'energy':energy,
                       'instrumentalness':instrumentalness, 'liveness':liveness, 'loudness': loudness, 'speechiness':speechiness, 'tempo':tempo, 'time_signature':time_signature}, ignore_index=True
                     )
        
df.to_csv('Donald_Playlist_July_22.csv', index=False)

ZGNjMjFhMzdjYzUwNDMxMjhkMjFkOWYxM2FlNjY0OTA6OTBjYmU5MzNmYWY2NDU1NDgxMDQ0ZGZjNmU0MDFhZjA=
<Response [200]>
{
  "access_token": "BQBxQEfy8KnKHWhJNfZCyuZBKI_c9J2uyfCNJIBuvQsqVWYNBPPKSRuC9hIHuazBCqL_fs6kthlOSeR8Vxzy0NKt7topm7wovpgeqmJwT_eF35rOSHU",
  "token_type": "Bearer",
  "expires_in": 3600
}


In [12]:
#API Request to retrieve my friend Tarush's most recent fifty playlists to build a track library

df2 = pd.DataFrame(columns=['track_id', 'name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'energy','instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature']
                 )

userID='121047889' #obtain user ID from Spotify Profile
token= getAccessToken(clientID, clientSecret)
playlists=getPlaylist(token,userID, 50, 0)

#iterate through user playlists
for p in playlists['items']:
    playlistID= p['id']
    tracklist= getPlaylistTracks(token, playlistID)
    
    #collect information from tracks in playlist
    for t in tracklist['items'][:50]:
        album= t['track']['album']['name']
        release_date=t['track']['album']['release_date']
        for a in t['track']['artists']:
            artist= a['name']
        songName = t['track']['name']
        popularity = t['track']['popularity']
        trackID= t['track']['id']
        features= getAudioFeatures(token, trackID)
        length= features['duration_ms']
        danceability= features['danceability']
        acousticness= features['acousticness']
        energy = features['energy']
        instrumentalness = features['instrumentalness']
        liveness= features['liveness']
        loudness = features['loudness']
        speechiness = features['speechiness']
        tempo = features['tempo']
        time_signature= features['time_signature']
        
    
        df2= df2.append({'track_id': trackID,'name':songName, 'album':album, 'artist':artist, 'release_date':release_date,'length': length, 'popularity':popularity, 'danceability':danceability, 'acousticness':acousticness, 'energy':energy,
                       'instrumentalness':instrumentalness, 'liveness':liveness, 'loudness': loudness, 'speechiness':speechiness, 'tempo':tempo, 'time_signature':time_signature}, ignore_index=True
                     )
        
df2.to_csv('Tarush_Playlist_July_22.csv', index=False)

ZGNjMjFhMzdjYzUwNDMxMjhkMjFkOWYxM2FlNjY0OTA6OTBjYmU5MzNmYWY2NDU1NDgxMDQ0ZGZjNmU0MDFhZjA=
<Response [200]>
{
  "access_token": "BQAzIJ5AK5kxfk1oTlo_d_hKN38tdkIjB2cuPALp9cmqWXaoBVGQGZZN-JH9yP9uo9684QraMnfEbXyUZY2jdDkPvJPt_EEqPjMbzd1b8n6wTiYdMZ4",
  "token_type": "Bearer",
  "expires_in": 3600
}


In [47]:
#API Requests for Genre Playlists to get recommendations from

df3 = pd.DataFrame(columns=['track_id', 'name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'acousticness', 'energy','instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'time_signature']
                 )

token= getAccessToken(clientID, clientSecret)
playlists=getGenrePlaylist(token,"edm_dance", 25, 0)


#iterate through user playlists
for p in playlists['playlists']['items']:
    playlistID= p['id']
    tracklist= getPlaylistTracks(token, playlistID)

    #collect information from tracks in playlist
    for t in tracklist['items']:
        album = t['track']['album']['name']
        release_date = t['track']['album']['release_date']
        for a in t['track']['artists']:
            artist= a['name']
        songName = t['track']['name']
        popularity = t['track']['popularity']
        trackID= t['track']['id']
        features = getAudioFeatures(token, trackID)
        #time.sleep(1.5)
        #print(json.dumps(features, indent=2))
        length = features['duration_ms']
        danceability = features["danceability"]
        acousticness= features['acousticness']
        energy = features['energy']
        instrumentalness = features['instrumentalness']
        liveness= features['liveness']
        loudness = features['loudness']
        speechiness = features['speechiness']
        tempo = features['tempo']
        time_signature= features['time_signature']

        df3 = df3.append({'track_id': trackID,'name':songName, 'album':album, 'artist':artist, 'release_date':release_date,'length': length, 'popularity':popularity, 'danceability':danceability, 'acousticness':acousticness, 'energy':energy,
                        'instrumentalness':instrumentalness, 'liveness':liveness, 'loudness': loudness, 'speechiness':speechiness, 'tempo':tempo, 'time_signature':time_signature}, ignore_index=True
                        )        
df3.to_csv('Electronic_Playlist_July_22.csv', index=False)

In [22]:
#clean data frames for modeling
df=pd.read_csv('Donald_Playlist_July_22.csv')
df2=pd.read_csv('Spotify_Electronic_Playlist_22.csv')
df2.shape

(7308, 16)

In [24]:
# Dropping duplicated songs
df = df.drop_duplicates(subset=['track_id'])
df2 = df2.drop_duplicates(subset=['track_id'])
df2['track_id'].value_counts()

75L0L7Ke5OISvwX0RoONtI    1
1aFaTUEi3pNVmPvHJ4rBTp    1
4xT0BmSuDNp2vkK5v07yba    1
7bg3UimnS4beLnE476hfEE    1
1Ckah0ICBG4AoAY0x6LbSF    1
                         ..
70UlGPMsr7TqIZaMqIdWxq    1
3wPJnbaKlWylBEdHNA0u9D    1
5ctXnsZojAvfDq75D4N1bz    1
1xNcBAoUw8Hz6LqK2jt4Ff    1
0Rz8DdxSqMuYDzTrzXSQNG    1
Name: track_id, Length: 5847, dtype: int64

In [25]:
# Dropping columns that could lead to data leakage
df = df.drop(columns=['name', 'album', 'artist', 'release_date'])
df2 = df2.drop(columns=['name', 'album', 'artist', 'release_date'])
df2.head()

Unnamed: 0,track_id,length,popularity,danceability,acousticness,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,408UEGA6FV2nvQjr1nt1VT,240000,51,0.502,0.0543,0.874,1e-06,0.426,-3.742,0.203,141.776,4
1,4XeIiGpUBshIfs9yrBDVZC,359126,77,0.633,0.00764,0.92,0.00177,0.123,-2.812,0.0928,114.997,4
2,6GomT970rCOkKAyyrwJeZi,157445,83,0.848,0.0169,0.821,0.000403,0.0962,-5.408,0.0527,125.051,4
3,50ZFpw2wS6ERvLmW8TINhq,168053,82,0.788,0.223,0.945,3e-06,0.115,-5.091,0.0599,128.036,4
4,08Ecw0ItPxGeHS9Mexr8cs,242905,73,0.716,0.00857,0.655,0.162,0.0572,-5.999,0.0444,125.012,4


In [26]:
df['favorite'] = 1
df2['favorite'] = 0

In [27]:
df2.shape

(5847, 13)

In [28]:
#save dataframes for machine learning model
df.to_csv('DonaldJuly22ML.csv', index=False)
df2.to_csv('SpotifyJuly22ML.csv', index=False)