# Spotify API

### Main Goal: Utilizing the Spotify API, determine other features of each track from our trimmed dataset to determine if those other features can be used to correctly classify the genre.

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np

import findspark
findspark.init()
from pyspark import SparkContext
sc = SparkContext() 

### Utilized a Python package called Spotipy

In [2]:
client_credentials_manager = SpotifyClientCredentials(client_id = "ed85fecdaeeb475ebe9a9f54a40eb054",client_secret='8001bae67076404bbb1a5980f2db94b1')
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Pandas

In [100]:
songs = pd.read_csv("cleaned_6_miheer8000rockpop.csv")

In [101]:
songs.head()

Unnamed: 0,index,genre,lyrics,song,artist
0,192053,Rock,It all started way back then A lifetime of rob...,crime-of-the-century,the-business
1,231708,Rock,Amidst the burning rubble the spirit still liv...,100-years-war,faction
2,350323,Rock,Is there anybody out there? Anyone that's love...,how-do-ya-feel-tonight,bryan-adams
3,37853,Rock,Here is the room that we shared The room that ...,i-haven-t-changed-the-room,barry-manilow
4,107765,Rock,"I do it on a whim, its rhyme without reason wh...",without-reason,fray


In [102]:
songs['song_artist'] = songs['song'] + ":" + songs['artist']
songs.head()

Unnamed: 0,index,genre,lyrics,song,artist,song_artist
0,192053,Rock,It all started way back then A lifetime of rob...,crime-of-the-century,the-business,crime-of-the-century:the-business
1,231708,Rock,Amidst the burning rubble the spirit still liv...,100-years-war,faction,100-years-war:faction
2,350323,Rock,Is there anybody out there? Anyone that's love...,how-do-ya-feel-tonight,bryan-adams,how-do-ya-feel-tonight:bryan-adams
3,37853,Rock,Here is the room that we shared The room that ...,i-haven-t-changed-the-room,barry-manilow,i-haven-t-changed-the-room:barry-manilow
4,107765,Rock,"I do it on a whim, its rhyme without reason wh...",without-reason,fray,without-reason:fray


In [8]:
def clean_artist(artist):
    artist.replace('-', ' ')
    return artist.upper()

### Find_song contains one Spotify API calls. The call is a get request which searches the song and gets the id of the song that Spotify stores and also ensures the artist from our dataset matches the artist from Spotify.
(Takes about an hour to run because of the number of API calls being made.)

In [9]:
def find_song(song_artist):
    song_name, artist = song_artist.split(":")
    artist = clean_artist(artist)
    results = sp.search(q='track:' + song_name, type='track')
    lenTracks = len(results['tracks']['items'])
    if lenTracks == 0:
        return None
    idS = None
    for i in range(lenTracks):
        if results['tracks']['items'][i]['album']['artists'][0]['name'].upper() == artist:
            idS = results['tracks']['items'][i]['id']
    return idS     
# newresults = sp.audio_features(idS)
# print(newresults)


### After finding the ID of the song, we map each id to the audio features of the song to obtain the danceability, acousticness, etc. measures which we will use to classify the genre.

In [10]:
songIDS = songs['song_artist'].map(lambda x: find_song(x)).dropna()

retrying ...1secs


In [20]:
len(songIDS)

1343

In [12]:
audio_feat = songIDS.map(lambda x: sp.audio_features(x))

In [18]:
series = audio_feat.map(lambda x: x[0])

5     {'danceability': 0.376, 'energy': 0.948, 'key'...
10    {'danceability': 0.397, 'energy': 0.446, 'key'...
22    {'danceability': 0.518, 'energy': 0.203, 'key'...
42    {'danceability': 0.57, 'energy': 0.823, 'key':...
66    {'danceability': 0.64, 'energy': 0.232, 'key':...
Name: song_artist, dtype: object

In [14]:
df = pd.DataFrame(series.tolist())
df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1333,1334,1335,1336,1337,1338,1339,1340,1341,1342
acousticness,0.000124,0.287,0.586,0.0398,0.674,0.0101,0.00275,0.0632,0.322,0.0905,...,0.00856,0.0944,0.15,0.0606,0.226,0.0167,0.00136,0.0506,0.687,0.0337
analysis_url,https://api.spotify.com/v1/audio-analysis/0Ytl...,https://api.spotify.com/v1/audio-analysis/2C0Y...,https://api.spotify.com/v1/audio-analysis/09at...,https://api.spotify.com/v1/audio-analysis/54k4...,https://api.spotify.com/v1/audio-analysis/3MPz...,https://api.spotify.com/v1/audio-analysis/3Cga...,https://api.spotify.com/v1/audio-analysis/7xdd...,https://api.spotify.com/v1/audio-analysis/006R...,https://api.spotify.com/v1/audio-analysis/5ZQV...,https://api.spotify.com/v1/audio-analysis/7jHq...,...,https://api.spotify.com/v1/audio-analysis/4wHh...,https://api.spotify.com/v1/audio-analysis/3uqz...,https://api.spotify.com/v1/audio-analysis/0jyB...,https://api.spotify.com/v1/audio-analysis/5aYy...,https://api.spotify.com/v1/audio-analysis/298M...,https://api.spotify.com/v1/audio-analysis/4AYh...,https://api.spotify.com/v1/audio-analysis/1StV...,https://api.spotify.com/v1/audio-analysis/6gQr...,https://api.spotify.com/v1/audio-analysis/1Yid...,https://api.spotify.com/v1/audio-analysis/67NK...
danceability,0.376,0.397,0.518,0.57,0.64,0.39,0.508,0.352,0.416,0.388,...,0.728,0.723,0.624,0.694,0.516,0.718,0.677,0.691,0.707,0.685
duration_ms,171293,288813,223840,182040,265693,187667,238500,267893,243787,161007,...,165240,192667,292720,273427,294507,272147,337200,223240,194187,259040
energy,0.948,0.446,0.203,0.823,0.232,0.887,0.875,0.791,0.545,0.99,...,0.846,0.69,0.897,0.886,0.545,0.751,0.678,0.927,0.656,0.6
id,0YtlU1KOPcxvNyiJTzqnCd,2C0YMWYyTjhRpEz1Xk42Hk,09atT5JgSXIcq9v9H9qoQp,54k4yt6SAkvlPcTMe4PKeC,3MPzfijILwzlhhvBFKOTEB,3CgaJaolETpSJlY0g7lMmf,7xddswKFyyesNjV8wpCGLo,006RKA1XTkko2TWvLzln9p,5ZQV9gdVC5Vvaw3BCPl2p6,7jHqV4pe8Yoh5A9oYoGRWt,...,4wHhLyYVpwgnAzIGiVoTRj,3uqzRnkdAZZrTX18xdBQ73,0jyByiP8zEfCi9Xjjuy3ce,5aYyJsxgHxHk9BFnHs4UFj,298M3RVPq5D1EhvgDzu7Mi,4AYhsnQ43KBckV3RBagFPJ,1StVq7G98jtRPttTbliku7,6gQrEwP4pFpszAnMfmASns,1Yid1jOsjDdITUUAL9I88s,67NKaMOPOJKlWsYW6mVhSs
instrumentalness,0.000104,0.00866,1.63e-06,0,4.66e-06,3.2e-05,0,5.56e-06,0,0.000229,...,0.00789,8.77e-06,2.96e-06,9.69e-06,0,1.78e-05,0.00478,7.24e-06,0,0.000365
key,9,4,9,8,5,9,9,4,6,3,...,4,4,1,0,10,6,7,4,0,9
liveness,0.119,0.109,0.147,0.0996,0.104,0.365,0.319,0.621,0.105,0.192,...,0.355,0.196,0.126,0.0596,0.0817,0.084,0.0489,0.132,0.163,0.156
loudness,-4.683,-7.411,-14.529,-4.761,-13.337,-4.782,-3.262,-4.929,-6.891,-3.114,...,-6.783,-9.254,-5.234,-6.908,-7.687,-5.965,-6.983,-6.925,-5.588,-14.004


In [46]:
songIDS.head()

5     0YtlU1KOPcxvNyiJTzqnCd
10    2C0YMWYyTjhRpEz1Xk42Hk
22    09atT5JgSXIcq9v9H9qoQp
42    54k4yt6SAkvlPcTMe4PKeC
66    3MPzfijILwzlhhvBFKOTEB
Name: song_artist, dtype: object

### Afterwards, we need to merge the audio features with the original dataset using the ID of the song in order to get the genre.

In [60]:
songNames = songIDS.map(lambda x: (x, sp.track(x)))

In [61]:
songNames = songNames.map(lambda x: (x[0], x[1]['name']))
dfNames = pd.DataFrame(songNames.tolist())
dfNames = dfNames.set_index(0)
dfNames = dfNames.rename(index = str, columns = {1 : 'Song Name'})
dfNames.head()

Unnamed: 0_level_0,Song Name
0,Unnamed: 1_level_1
0YtlU1KOPcxvNyiJTzqnCd,Pale Blue Face
2C0YMWYyTjhRpEz1Xk42Hk,Seperti Yang Kau Minta
09atT5JgSXIcq9v9H9qoQp,Then Again
54k4yt6SAkvlPcTMe4PKeC,Walk To The Bay
3MPzfijILwzlhhvBFKOTEB,I Showed Her


In [104]:
dfCombined = df.join(dfNames, on='id', how='inner')

In [105]:
dfCombined.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1333,1334,1335,1336,1337,1338,1339,1340,1341,1342
acousticness,0.000124,0.287,0.586,0.0398,0.674,0.0101,0.00275,0.0632,0.322,0.0905,...,0.00856,0.0944,0.15,0.0606,0.226,0.0167,0.00136,0.0506,0.687,0.0337
analysis_url,https://api.spotify.com/v1/audio-analysis/0Ytl...,https://api.spotify.com/v1/audio-analysis/2C0Y...,https://api.spotify.com/v1/audio-analysis/09at...,https://api.spotify.com/v1/audio-analysis/54k4...,https://api.spotify.com/v1/audio-analysis/3MPz...,https://api.spotify.com/v1/audio-analysis/3Cga...,https://api.spotify.com/v1/audio-analysis/7xdd...,https://api.spotify.com/v1/audio-analysis/006R...,https://api.spotify.com/v1/audio-analysis/5ZQV...,https://api.spotify.com/v1/audio-analysis/7jHq...,...,https://api.spotify.com/v1/audio-analysis/4wHh...,https://api.spotify.com/v1/audio-analysis/3uqz...,https://api.spotify.com/v1/audio-analysis/0jyB...,https://api.spotify.com/v1/audio-analysis/5aYy...,https://api.spotify.com/v1/audio-analysis/298M...,https://api.spotify.com/v1/audio-analysis/4AYh...,https://api.spotify.com/v1/audio-analysis/1StV...,https://api.spotify.com/v1/audio-analysis/6gQr...,https://api.spotify.com/v1/audio-analysis/1Yid...,https://api.spotify.com/v1/audio-analysis/67NK...
danceability,0.376,0.397,0.518,0.57,0.64,0.39,0.508,0.352,0.416,0.388,...,0.728,0.723,0.624,0.694,0.516,0.718,0.677,0.691,0.707,0.685
duration_ms,171293,288813,223840,182040,265693,187667,238500,267893,243787,161007,...,165240,192667,292720,273427,294507,272147,337200,223240,194187,259040
energy,0.948,0.446,0.203,0.823,0.232,0.887,0.875,0.791,0.545,0.99,...,0.846,0.69,0.897,0.886,0.545,0.751,0.678,0.927,0.656,0.6
id,0YtlU1KOPcxvNyiJTzqnCd,2C0YMWYyTjhRpEz1Xk42Hk,09atT5JgSXIcq9v9H9qoQp,54k4yt6SAkvlPcTMe4PKeC,3MPzfijILwzlhhvBFKOTEB,3CgaJaolETpSJlY0g7lMmf,7xddswKFyyesNjV8wpCGLo,006RKA1XTkko2TWvLzln9p,5ZQV9gdVC5Vvaw3BCPl2p6,7jHqV4pe8Yoh5A9oYoGRWt,...,4wHhLyYVpwgnAzIGiVoTRj,3uqzRnkdAZZrTX18xdBQ73,0jyByiP8zEfCi9Xjjuy3ce,5aYyJsxgHxHk9BFnHs4UFj,298M3RVPq5D1EhvgDzu7Mi,4AYhsnQ43KBckV3RBagFPJ,1StVq7G98jtRPttTbliku7,6gQrEwP4pFpszAnMfmASns,1Yid1jOsjDdITUUAL9I88s,67NKaMOPOJKlWsYW6mVhSs
instrumentalness,0.000104,0.00866,1.63e-06,0,4.66e-06,3.2e-05,0,5.56e-06,0,0.000229,...,0.00789,8.77e-06,2.96e-06,9.69e-06,0,1.78e-05,0.00478,7.24e-06,0,0.000365
key,9,4,9,8,5,9,9,4,6,3,...,4,4,1,0,10,6,7,4,0,9
liveness,0.119,0.109,0.147,0.0996,0.104,0.365,0.319,0.621,0.105,0.192,...,0.355,0.196,0.126,0.0596,0.0817,0.084,0.0489,0.132,0.163,0.156
loudness,-4.683,-7.411,-14.529,-4.761,-13.337,-4.782,-3.262,-4.929,-6.891,-3.114,...,-6.783,-9.254,-5.234,-6.908,-7.687,-5.965,-6.983,-6.925,-5.588,-14.004


### cap_songs adjusts the case sensitive nature of the songs to match the songs on the join operation.

In [106]:
def cap_songs(song):
    parts = song.split('-')
    for i in range(len(parts)):
        parts[i] = parts[i].capitalize()
    return " ".join(parts)

In [107]:
dfCombined = dfCombined.set_index('Song Name')
dfCombined.head()

Unnamed: 0_level_0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence
Song Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Pale Blue Face,0.000124,https://api.spotify.com/v1/audio-analysis/0Ytl...,0.376,171293,0.948,0YtlU1KOPcxvNyiJTzqnCd,0.000104,9,0.119,-4.683,1,0.108,97.085,4,https://api.spotify.com/v1/tracks/0YtlU1KOPcxv...,audio_features,spotify:track:0YtlU1KOPcxvNyiJTzqnCd,0.477
Seperti Yang Kau Minta,0.287,https://api.spotify.com/v1/audio-analysis/2C0Y...,0.397,288813,0.446,2C0YMWYyTjhRpEz1Xk42Hk,0.00866,4,0.109,-7.411,1,0.0248,170.541,4,https://api.spotify.com/v1/tracks/2C0YMWYyTjhR...,audio_features,spotify:track:2C0YMWYyTjhRpEz1Xk42Hk,0.199
Then Again,0.586,https://api.spotify.com/v1/audio-analysis/09at...,0.518,223840,0.203,09atT5JgSXIcq9v9H9qoQp,2e-06,9,0.147,-14.529,1,0.0275,103.305,4,https://api.spotify.com/v1/tracks/09atT5JgSXIc...,audio_features,spotify:track:09atT5JgSXIcq9v9H9qoQp,0.144
Walk To The Bay,0.0398,https://api.spotify.com/v1/audio-analysis/54k4...,0.57,182040,0.823,54k4yt6SAkvlPcTMe4PKeC,0.0,8,0.0996,-4.761,1,0.0294,133.117,4,https://api.spotify.com/v1/tracks/54k4yt6SAkvl...,audio_features,spotify:track:54k4yt6SAkvlPcTMe4PKeC,0.93
I Showed Her,0.674,https://api.spotify.com/v1/audio-analysis/3MPz...,0.64,265693,0.232,3MPzfijILwzlhhvBFKOTEB,5e-06,5,0.104,-13.337,1,0.027,147.676,4,https://api.spotify.com/v1/tracks/3MPzfijILwzl...,audio_features,spotify:track:3MPzfijILwzlhhvBFKOTEB,0.263


In [108]:
songs['song'] = songs['song'].map(lambda x: cap_songs(x))
songs.head()

Unnamed: 0,index,genre,lyrics,song,artist,song_artist
0,192053,Rock,It all started way back then A lifetime of rob...,Crime Of The Century,the-business,crime-of-the-century:the-business
1,231708,Rock,Amidst the burning rubble the spirit still liv...,100 Years War,faction,100-years-war:faction
2,350323,Rock,Is there anybody out there? Anyone that's love...,How Do Ya Feel Tonight,bryan-adams,how-do-ya-feel-tonight:bryan-adams
3,37853,Rock,Here is the room that we shared The room that ...,I Haven T Changed The Room,barry-manilow,i-haven-t-changed-the-room:barry-manilow
4,107765,Rock,"I do it on a whim, its rhyme without reason wh...",Without Reason,fray,without-reason:fray


In [109]:
songs = songs.set_index('song')
songs.head()

Unnamed: 0_level_0,index,genre,lyrics,artist,song_artist
song,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Crime Of The Century,192053,Rock,It all started way back then A lifetime of rob...,the-business,crime-of-the-century:the-business
100 Years War,231708,Rock,Amidst the burning rubble the spirit still liv...,faction,100-years-war:faction
How Do Ya Feel Tonight,350323,Rock,Is there anybody out there? Anyone that's love...,bryan-adams,how-do-ya-feel-tonight:bryan-adams
I Haven T Changed The Room,37853,Rock,Here is the room that we shared The room that ...,barry-manilow,i-haven-t-changed-the-room:barry-manilow
Without Reason,107765,Rock,"I do it on a whim, its rhyme without reason wh...",fray,without-reason:fray


In [110]:
songs.head()

Unnamed: 0_level_0,index,genre,lyrics,artist,song_artist
song,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Crime Of The Century,192053,Rock,It all started way back then A lifetime of rob...,the-business,crime-of-the-century:the-business
100 Years War,231708,Rock,Amidst the burning rubble the spirit still liv...,faction,100-years-war:faction
How Do Ya Feel Tonight,350323,Rock,Is there anybody out there? Anyone that's love...,bryan-adams,how-do-ya-feel-tonight:bryan-adams
I Haven T Changed The Room,37853,Rock,Here is the room that we shared The room that ...,barry-manilow,i-haven-t-changed-the-room:barry-manilow
Without Reason,107765,Rock,"I do it on a whim, its rhyme without reason wh...",fray,without-reason:fray


In [111]:
dfCombined.head()

Unnamed: 0_level_0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence
Song Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Pale Blue Face,0.000124,https://api.spotify.com/v1/audio-analysis/0Ytl...,0.376,171293,0.948,0YtlU1KOPcxvNyiJTzqnCd,0.000104,9,0.119,-4.683,1,0.108,97.085,4,https://api.spotify.com/v1/tracks/0YtlU1KOPcxv...,audio_features,spotify:track:0YtlU1KOPcxvNyiJTzqnCd,0.477
Seperti Yang Kau Minta,0.287,https://api.spotify.com/v1/audio-analysis/2C0Y...,0.397,288813,0.446,2C0YMWYyTjhRpEz1Xk42Hk,0.00866,4,0.109,-7.411,1,0.0248,170.541,4,https://api.spotify.com/v1/tracks/2C0YMWYyTjhR...,audio_features,spotify:track:2C0YMWYyTjhRpEz1Xk42Hk,0.199
Then Again,0.586,https://api.spotify.com/v1/audio-analysis/09at...,0.518,223840,0.203,09atT5JgSXIcq9v9H9qoQp,2e-06,9,0.147,-14.529,1,0.0275,103.305,4,https://api.spotify.com/v1/tracks/09atT5JgSXIc...,audio_features,spotify:track:09atT5JgSXIcq9v9H9qoQp,0.144
Walk To The Bay,0.0398,https://api.spotify.com/v1/audio-analysis/54k4...,0.57,182040,0.823,54k4yt6SAkvlPcTMe4PKeC,0.0,8,0.0996,-4.761,1,0.0294,133.117,4,https://api.spotify.com/v1/tracks/54k4yt6SAkvl...,audio_features,spotify:track:54k4yt6SAkvlPcTMe4PKeC,0.93
I Showed Her,0.674,https://api.spotify.com/v1/audio-analysis/3MPz...,0.64,265693,0.232,3MPzfijILwzlhhvBFKOTEB,5e-06,5,0.104,-13.337,1,0.027,147.676,4,https://api.spotify.com/v1/tracks/3MPzfijILwzl...,audio_features,spotify:track:3MPzfijILwzlhhvBFKOTEB,0.263


### Inner join the songs on the ID 

In [114]:
dfFinal = dfCombined.join(songs, how='inner')
dfFinal['genre'].value_counts()

Rock    460
Pop     325
Name: genre, dtype: int64

In [115]:
dfFinal.to_csv("additional_features_pop_rock_miheer.csv")