In [1]:
import os
import json
import chardet

import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials

import pandas as pd
pd.set_option("max_rows", 10)

data_root = "../data/"
settings_root = "../settings/"

In [2]:
# Reading Spotify web API credentials from settings.env hidden file

settingsfile = os.path.join(settings_root, "settings.env")

with open(settingsfile) as f:
    env_vars = json.loads(f.read())

# Set environment variables
os.environ['SPOTIPY_CLIENT_ID'] = env_vars['SPOTIPY_CLIENT_ID']
os.environ['SPOTIPY_CLIENT_SECRET'] = env_vars['SPOTIPY_CLIENT_SECRET']
os.environ['SPOTIPY_REDIRECT_URI'] = env_vars['SPOTIPY_REDIRECT_URI']
os.environ['SPOTIPY_USER'] = env_vars['SPOTIPY_USER']

In [3]:
# Authorization flow

client_id = os.getenv('SPOTIPY_CLIENT_ID')
client_secret = os.getenv('SPOTIPY_CLIENT_SECRET')
redirect_uri = os.getenv('SPOTIPY_REDIRECT_URI')
username = os.getenv('SPOTIPY_USER')
scope = 'user-library-read'

token = util.prompt_for_user_token(username, scope, client_id, client_secret, redirect_uri)

if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for", username)

Couldn't read cache at: .cache-anaeguia
Couldn't read cache at: .cache-anaeguia


In [4]:
# 1. List of Concerts from NDB 2021 - 2016

datafile = os.path.join(data_root, "NDB_artist_2021_2016.csv")

with open(datafile, 'rb') as f:
    result = chardet.detect(f.read())

concerts_ndb = pd.read_csv(datafile, encoding=result['encoding'])

In [5]:
concerts_ndb

Unnamed: 0,edition,date,artist
0,2021,11/06/2021,Wilco
1,2021,13/06/2021,Imelda May
2,2021,15/06/2021,Woodkid
3,2021,15/06/2021,Awir Leon
4,2021,16/06/2021,Estrella Morente
...,...,...,...
203,2016,22/07/2016,Seal
204,2016,23/07/2016,Mariza
205,2016,26/07/2016,Electric Light Orchestra
206,2016,27/07/2016,Rozalén


In [6]:
concerts_ndb.drop('date', inplace=True,axis=1)

In [7]:
# Total number of artists playing at NDB different editions

len(concerts_ndb['artist'].unique())

192

In [8]:
# Find duplicates

concerts_ndb[concerts_ndb.duplicated(['artist'])].shape

(16, 2)

In [9]:
duplicate = concerts_ndb[concerts_ndb.duplicated(['artist'])]

In [10]:
duplicate.sort_values(by=['artist'])

Unnamed: 0,edition,artist
191,2016,Bomba Estéreo
188,2016,Brad Mehldau
122,2018,Cècile McLorin Salvant
201,2016,Cècile McLorin Salvant
80,2019,Diego El Cigala
...,...,...
180,2016,Pink Martini
155,2017,Roger Hodgson
114,2018,Rufus Wainwright
93,2019,Tomatito


In [11]:
# Convert edition column to dummy/indicator variable to avoid duplicates and keep edition column for future filtering

concerts_ndb = pd.get_dummies(concerts_ndb, prefix='', prefix_sep='', columns=['edition'])

In [12]:
concerts_ndb = concerts_ndb.groupby('artist').agg({'2016':'sum',
                                    '2017':'sum',
                                    '2018':'sum',
                                    '2019':'sum',
                                    '2021':'sum'}).reset_index()

In [13]:
concerts_ndb.loc[concerts_ndb['artist'] == 'Cècile McLorin Salvant']

Unnamed: 0,artist,2016,2017,2018,2019,2021
46,Cècile McLorin Salvant,1,0,1,0,1


In [14]:
# Add column concert_id

concerts_ndb.insert(0, 'concert_id', concerts_ndb.index + 1)

In [15]:
concerts_ndb.isnull().sum()

concert_id    0
artist        0
2016          0
2017          0
2018          0
2019          0
2021          0
dtype: int64

In [16]:
concerts_ndb.shape

(192, 7)

In [17]:
concerts_ndb

Unnamed: 0,concert_id,artist,2016,2017,2018,2019,2021
0,1,2Cellos,1,0,0,0,0
1,2,Agnes Obel,0,0,0,0,1
2,3,Alan Parsons,1,0,0,0,0
3,4,Alex Conde,0,0,0,0,1
4,5,Alfa Mist,0,0,0,0,1
...,...,...,...,...,...,...,...
187,188,Youssou N'dour,0,0,0,0,1
188,189,Zahara,0,0,0,1,0
189,190,Zenet,0,0,0,0,1
190,191,Zoé,0,0,1,0,0


In [18]:
def fetch_artist_features(sp, df):
    """
     Extract artist most relevant features from Spotify API
     
     :param sp: spotify’s application authorization
     :param df: dataframe of concerts at NDB Festival
     
     :return dataframe with artist´s features
    """    
    
    index = 0
    artist_spid = []
    artist_name = []
    artist_followers = []
    artist_popularity = []    
    artist_genres = []

    while index < df.shape[0]:
        
        name = df.iloc[index].artist
        
        result = sp.search(q='artist:' + name, type='artist', limit=1)
        items = result['artists']['items']
        
        if len(items) > 0:
            artist = items[0]
            
            artist_spid.append(artist['id'])
            artist_name.append(artist['name'])
            artist_followers.append(artist['followers']['total'])
            artist_popularity.append(artist['popularity'])            
            artist_genres.append(artist['genres'])
            
        else: 
            print("Can't read artist " + df.iloc[index].artist )
        
        index +=1
        
    df_artist_features = pd.DataFrame({'spid': artist_spid, 
                                       'name': artist_name, 
                                       'followers': artist_followers, 
                                       'popularity': artist_popularity,
                                       'genres': artist_genres})
        
    return df_artist_features

In [19]:
artists_ndb = fetch_artist_features(sp, concerts_ndb)

In [20]:
artists_ndb

Unnamed: 0,spid,name,followers,popularity,genres
0,6Fi8CHfO8WGtu3yO8c2Mc4,2CELLOS,770100,63,"[bow pop, cello, classify]"
1,1rKrEdI6GKirxWHxIUPYms,Agnes Obel,618120,65,[chamber pop]
2,2m62cc253Xvd9qYQ8d2X3d,The Alan Parsons Project,897739,66,"[album rock, art rock, classic rock, mellow go..."
3,34gsS5srfZEkRrwauF4Czj,Alex Conde,220,1,[spanish jazz]
4,2i1CPudyCUjL50Wqjv8AMI,Alfa Mist,126161,53,"[british jazz, indie jazz, indie soul, neo r&b..."
...,...,...,...,...,...
187,77zlytAFjPFjUKda8TNIDY,Youssou N'Dour,108524,57,"[afropop, mande pop, mbalax, world]"
188,7uLePkJ2f0MwEcphODfkuu,Zahara,173606,59,"[cantautor, latin rock, spanish indie pop, spa..."
189,2s7td67DdtSXTx2TGzs01i,Zenet,59891,48,"[cantautor, flamenco]"
190,6IdtcAwaNVAggwd6sCKgTI,Zoé,3257718,74,"[latin alternative, latin rock, mexican rock, ..."
