Adding spotify data to already aquired muse dataset using spotipy

In [1]:
pip install spotipy

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import spotipy
# SpotifyClientCredentials to access personal data
from spotipy.oauth2 import SpotifyClientCredentials
import json
import numpy as np

In [3]:
# create json file with the authorization information, ie: client_id and client_secret 
# these can be found on the developers page
# open the authorization.json as json
credentials = json.load(open('authorization.json'))
# can also directly input id and secret here
# have made a different file for the sake of confidentiality
client_id = credentials['client_id']
client_secret = credentials['client_secret']

client_credentials_manager = SpotifyClientCredentials(client_id=client_id,client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [4]:
df = pd.read_csv('muse_v3.csv',engine='python')
list(df.columns)

['lastfm_url',
 'track',
 'artist',
 'seeds',
 'number_of_emotion_tags',
 'valence_tags',
 'arousal_tags',
 'dominance_tags',
 'mbid',
 'spotify_id',
 'genre']

In [5]:
# removing unimportant columns that wont be needed in the model
df = df.drop(columns=['lastfm_url', 'valence_tags', 'arousal_tags', 'dominance_tags', 'mbid', 'genre'])

In [6]:
# seeing what the dataset looks like
df

Unnamed: 0,track,artist,seeds,number_of_emotion_tags,spotify_id
0,'Till I Collapse,Eminem,['aggressive'],6,4xkOaSrkexMciUUogZKVTS
1,St. Anger,Metallica,['aggressive'],8,3fOc9x06lKJBhz435mInlH
2,Speedin',Rick Ross,['aggressive'],1,3Y96xd4Ce0J47dcalLrEC8
3,Bamboo Banga,M.I.A.,"['aggressive', 'fun', 'sexy', 'energetic']",13,6tqFC1DIOphJkCwrjVzPmg
4,Die MF Die,Dope,['aggressive'],7,5bU4KX47KqtDKKaLM4QCzh
...,...,...,...,...,...
89996,Battle,Gaby Hoffmann & Jay Duplass,['transparent'],1,
89997,Hands Tied to the Roots of a Hemorrhage,Omar Rodriguez-Lopez,['transparent'],4,
89998,Crookfield Zoo,Ace White,['transparent'],1,
89999,Transparent (Full Version),Second Chyld,['transparent'],1,


In [7]:
# we see that there are some songs without a spotify id, we should remove them
df = df.dropna(subset=['spotify_id'])
df

Unnamed: 0,track,artist,seeds,number_of_emotion_tags,spotify_id
0,'Till I Collapse,Eminem,['aggressive'],6,4xkOaSrkexMciUUogZKVTS
1,St. Anger,Metallica,['aggressive'],8,3fOc9x06lKJBhz435mInlH
2,Speedin',Rick Ross,['aggressive'],1,3Y96xd4Ce0J47dcalLrEC8
3,Bamboo Banga,M.I.A.,"['aggressive', 'fun', 'sexy', 'energetic']",13,6tqFC1DIOphJkCwrjVzPmg
4,Die MF Die,Dope,['aggressive'],7,5bU4KX47KqtDKKaLM4QCzh
...,...,...,...,...,...
89983,Secret,Quietdrive,['transparent'],1,2bRIsZ92JRKlvQOZlyR9CO
89985,The Last of the Rest Was the End,Medications,['transparent'],1,7o3Np7cho9cBCrNDokxzYC
89986,Lovechild,Daniel Lanois,['transparent'],2,4fVObxldDzxxRD6a5Eth9s
89991,Last Inhale,Tapage,['transparent'],6,5WxwRwUQ4R4L46VEm3213y


In [9]:
# making empty dataframe to populate with spotify information of the songs from muse dataset
merged_spotify_muse = pd.DataFrame(columns=['id', 'title', 'artist','danceability', 'energy', 'key', 'loudness',
                            'mode', 'acousticness', 'instrumentalness',
                            'liveness', 'valence', 'tempo',
                            'duration_ms', 'time_signature', 'sentiments', 'no_of_tags'])
merged_spotify_muse

Unnamed: 0,id,title,artist,danceability,energy,key,loudness,mode,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,sentiments,no_of_tags


In [10]:
# making a dummy dataframe for 100 songs that were throwing an error while getting spotify features
df_trial = df[36500:36600]
id_list = df_trial['spotify_id'].tolist()
features = sp.audio_features(id_list)
dummy_df = pd.DataFrame(data=features, columns=features[0].keys())
dummy_df['sentiments'] = "none"
dummy_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,sentiments
0,0.608,0.875,8,-4.636,1,0.0796,0.003820,0.002850,0.1580,0.5290,112.924,audio_features,13zEyjK3DZm0YtZObaaS1O,spotify:track:13zEyjK3DZm0YtZObaaS1O,https://api.spotify.com/v1/tracks/13zEyjK3DZm0...,https://api.spotify.com/v1/audio-analysis/13zE...,238507,4,none
1,0.352,0.414,4,-6.600,0,0.0254,0.391000,0.008310,0.1850,0.0688,60.042,audio_features,1HTqgP13dorct3BltLaB0l,spotify:track:1HTqgP13dorct3BltLaB0l,https://api.spotify.com/v1/tracks/1HTqgP13dorc...,https://api.spotify.com/v1/audio-analysis/1HTq...,341000,4,none
2,0.368,0.538,10,-10.067,0,0.0336,0.121000,0.794000,0.2010,0.1290,103.573,audio_features,6xKyallmbswWAQpbBV1BBz,spotify:track:6xKyallmbswWAQpbBV1BBz,https://api.spotify.com/v1/tracks/6xKyallmbswW...,https://api.spotify.com/v1/audio-analysis/6xKy...,155707,4,none
3,0.600,0.658,0,-5.386,1,0.0519,0.081700,0.000005,0.4200,0.6770,146.007,audio_features,6EvmFI1bV10Rtank7f1uQW,spotify:track:6EvmFI1bV10Rtank7f1uQW,https://api.spotify.com/v1/tracks/6EvmFI1bV10R...,https://api.spotify.com/v1/audio-analysis/6Evm...,208880,4,none
4,0.766,0.720,1,-8.347,1,0.1540,0.293000,0.000000,0.1070,0.4870,151.901,audio_features,2mPAAh221qDUfWOUmgY7BR,spotify:track:2mPAAh221qDUfWOUmgY7BR,https://api.spotify.com/v1/tracks/2mPAAh221qDU...,https://api.spotify.com/v1/audio-analysis/2mPA...,195536,4,none
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.861,0.502,5,-6.558,0,0.0656,0.008200,0.000127,0.0659,0.6210,130.564,audio_features,2GvaL6YMydiMkdbazcHBYi,spotify:track:2GvaL6YMydiMkdbazcHBYi,https://api.spotify.com/v1/tracks/2GvaL6YMydiM...,https://api.spotify.com/v1/audio-analysis/2Gva...,238533,4,none
96,0.549,0.436,1,-8.333,1,0.0311,0.701000,0.000000,0.1310,0.2490,138.151,audio_features,5kiMxK0Sv1z1d4q260NICr,spotify:track:5kiMxK0Sv1z1d4q260NICr,https://api.spotify.com/v1/tracks/5kiMxK0Sv1z1...,https://api.spotify.com/v1/audio-analysis/5kiM...,298573,4,none
97,0.713,0.596,9,-13.255,1,0.0896,0.251000,0.120000,0.0641,0.2810,122.951,audio_features,6XDLoOiCCfg1BxNcmBHyfX,spotify:track:6XDLoOiCCfg1BxNcmBHyfX,https://api.spotify.com/v1/tracks/6XDLoOiCCfg1...,https://api.spotify.com/v1/audio-analysis/6XDL...,342973,4,none
98,0.763,0.740,6,-9.811,0,0.0322,0.000721,0.770000,0.0848,0.8550,133.374,audio_features,0iaijSh55qg3rGdANiVMxD,spotify:track:0iaijSh55qg3rGdANiVMxD,https://api.spotify.com/v1/tracks/0iaijSh55qg3...,https://api.spotify.com/v1/audio-analysis/0iai...,270987,4,none


In [12]:
for i in range(0, 617):
  df_trial = df[i*100:(i+1)*100]
  id_list = df_trial['spotify_id'].tolist()
  track_names = df_trial['track'].tolist()
  artist_names = df_trial['artist'].tolist()
  sentiments = df_trial['seeds'].tolist()
  no_of_tags = df_trial['number_of_emotion_tags'].tolist()
  features = sp.audio_features(id_list)
  try:
    features_df = pd.DataFrame(data=features, columns=features[0].keys())
    features_df['sentiments'] = sentiments
  except:
    features_df = dummy_df
    features_df['sentiments'] = "none"
  
  features_df['title'] = track_names
  features_df['artist'] = artist_names
  features_df['no_of_tags'] = no_of_tags
  features_df = features_df[['id', 'title', 'artist','danceability', 'energy', 'key', 'loudness',
                            'mode', 'acousticness', 'instrumentalness',
                            'liveness', 'valence', 'tempo',
                            'duration_ms', 'time_signature', 'sentiments', 'no_of_tags']]
  merged_spotify_muse = pd.concat([merged_spotify_muse, features_df])

In [19]:
# resetting the index of merged_spotify_muse 
merged_spotify_muse = merged_spotify_muse.reset_index(drop = True)
merged_spotify_muse = merged_spotify_muse.drop(columns=['index'])

In [20]:
merged_spotify_muse

Unnamed: 0,id,title,artist,danceability,energy,key,loudness,mode,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,sentiments,no_of_tags
0,4xkOaSrkexMciUUogZKVTS,'Till I Collapse,Eminem,0.548,0.847,1,-3.237,1,0.06220,0.000000,0.0816,0.1000,171.447,297787,4,['aggressive'],6
1,3fOc9x06lKJBhz435mInlH,St. Anger,Metallica,0.249,0.949,2,-2.642,0,0.00131,0.022800,0.0953,0.4980,185.252,441133,4,['aggressive'],8
2,3Y96xd4Ce0J47dcalLrEC8,Speedin',Rick Ross,0.668,0.787,1,-4.226,1,0.10900,0.000000,0.2100,0.4780,100.059,204960,4,['aggressive'],1
3,6tqFC1DIOphJkCwrjVzPmg,Bamboo Banga,M.I.A.,0.805,0.918,9,-4.554,1,0.04930,0.000000,0.0691,0.7130,125.984,298360,4,"['aggressive', 'fun', 'sexy', 'energetic']",13
4,5bU4KX47KqtDKKaLM4QCzh,Die MF Die,Dope,0.657,0.960,5,-3.524,0,0.00169,0.000997,0.1090,0.5670,126.020,186067,4,['aggressive'],7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61625,2bRIsZ92JRKlvQOZlyR9CO,Secret,Quietdrive,0.396,0.915,0,-5.126,0,0.03340,0.000000,0.1350,0.0734,167.996,258373,4,['transparent'],1
61626,7o3Np7cho9cBCrNDokxzYC,The Last of the Rest Was the End,Medications,0.244,0.866,2,-5.774,0,0.00433,0.072200,0.0782,0.3400,144.844,324000,3,['transparent'],1
61627,4fVObxldDzxxRD6a5Eth9s,Lovechild,Daniel Lanois,0.236,0.107,10,-20.091,0,0.90100,0.748000,0.1120,0.0720,79.476,516280,4,['transparent'],2
61628,5WxwRwUQ4R4L46VEm3213y,Last Inhale,Tapage,0.542,0.909,2,-8.977,0,0.07180,0.817000,0.1250,0.0552,160.011,324258,4,['transparent'],6


In [21]:
# now we have the data that we want
# we should convert it to csv for future use
merged_spotify_muse.to_csv('merged_spotify_muse.csv')