# Data from Top200 SpotifyCharts


Proceso:
 - Sacar top200 de 2017, 2018, 2019 y 2010
 - De canciones duplicadas sumar los Streams y unir, quitar duplicados
 - Juntar Dataframes de años y añadir una nueva columna con el mismo.


## Preparación

In [1]:
# Help: https://github.com/kelvingakuo/fycharts#in
#! pip install fycharts

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from datetime import date

#Fechas 2020
fecha_fin_2020 = '2020-05-13' # Empieza a levantarse restricciones de salida
fecha_inicial_2020 = '2020-03-14' #14 Marzo se decreta el estado de alarma, con 5753 casos y 136 fallecidos
fecha_fin2_2020 = datetime.strptime(fecha_fin_2020, '%Y-%m-%d')
fecha_inicial2_2020 = datetime.strptime(fecha_inicial_2020, '%Y-%m-%d')

#Fechas 2019
fecha_fin_2019 = '2019-05-13'
fecha_inicial_2019 = '2019-03-14'
fecha_fin2_2019 = datetime.strptime(fecha_fin_2019, '%Y-%m-%d')
fecha_inicial2_2019 = datetime.strptime(fecha_inicial_2019, '%Y-%m-%d')

print('Días totales seleccionados 2020:', fecha_fin2_2020 - fecha_inicial2_2020)
print('Días totales seleccionados 2019:', fecha_fin2_2019 - fecha_inicial2_2019)

Días totales seleccionados 2020: 60 days, 0:00:00
Días totales seleccionados 2019: 60 days, 0:00:00


In [2]:
# ¡IMPORTANTE! Ejecutar una única vez para descargar los datos, configurado para España
'''
# Descarga año 2020
from fycharts.SpotifyCharts import SpotifyCharts
import sqlalchemy

api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///spotifycharts_2020.db", echo=False)
api.top200Daily(output_file = "top_200_daily_2020.csv", output_db = connector, webhook = "https://mywebhookssite.com/post/", 
                start = fecha_inicial_2020, end = fecha_fin_2020, region = ["es"])


# Descarga año 2019
from fycharts.SpotifyCharts import SpotifyCharts
import sqlalchemy

api = SpotifyCharts()
connector = sqlalchemy.create_engine("sqlite:///spotifycharts_2019.db", echo=False)
api.top200Daily(output_file = "top_200_daily_2019.csv", output_db = connector, webhook = "https://mywebhookssite.com/post/", 
                start = fecha_inicial_2019, end = fecha_fin_2019, region = ["es"])
'''

'\n# Descarga año 2020\nfrom fycharts.SpotifyCharts import SpotifyCharts\nimport sqlalchemy\n\napi = SpotifyCharts()\nconnector = sqlalchemy.create_engine("sqlite:///spotifycharts_2020.db", echo=False)\napi.top200Daily(output_file = "top_200_daily_2020.csv", output_db = connector, webhook = "https://mywebhookssite.com/post/", \n                start = fecha_inicial_2020, end = fecha_fin_2020, region = ["es"])\n\n\n# Descarga año 2019\nfrom fycharts.SpotifyCharts import SpotifyCharts\nimport sqlalchemy\n\napi = SpotifyCharts()\nconnector = sqlalchemy.create_engine("sqlite:///spotifycharts_2019.db", echo=False)\napi.top200Daily(output_file = "top_200_daily_2019.csv", output_db = connector, webhook = "https://mywebhookssite.com/post/", \n                start = fecha_inicial_2019, end = fecha_fin_2019, region = ["es"])\n'

## Importar la tabla

In [3]:
# Importamos la tabla con los datos y miramos a ver qué tal están
data_top200_2020= pd.read_csv("top_200_daily_2020.csv")
data_top200_2019= pd.read_csv("top_200_daily_2019.csv")
print(data_top200_2020.isnull().sum())
print('2020:', data_top200_2020.shape)
print('2019:',data_top200_2019.shape)
data_top200_2020.head()

Position      0
Track Name    0
Artist        0
Streams       0
date          0
region        0
spotify_id    0
dtype: int64
2020: (12200, 7)
2019: (12200, 7)


Unnamed: 0,Position,Track Name,Artist,Streams,date,region,spotify_id
0,1,Tusa,KAROL G,446086,2020-03-14,es,7k4t7uLgtOxPwTpFmtJNTY
1,2,La Difícil,Bad Bunny,399788,2020-03-14,es,6NfrH0ANGmgBXyxgV2PeXt
2,3,Tattoo,Rauw Alejandro,380503,2020-03-14,es,7na7Bk98usp84FaOJFPv3d
3,4,Diosa,Myke Towers,336910,2020-03-14,es,3JHpk0DOTOzyh0777JFAky
4,5,Rojo,J Balvin,327388,2020-03-14,es,4uziEsK1yiqdauKVDPsmVG


In [4]:
# Importamos con las columnas que nos interesan
data_top200_2020 = pd.read_csv("top_200_daily_2020.csv", parse_dates= ["date"], usecols=['Position','Track Name', 'Artist', 'date', 'Streams', 'date', 'spotify_id'])
data_top200_2019 = pd.read_csv("top_200_daily_2019.csv", parse_dates= ["date"], usecols=['Position','Track Name', 'Artist', 'date', 'Streams', 'date', 'spotify_id'])
#Meto el año en nueva columnas columnas
data_top200_2020['year'] = pd.DatetimeIndex(data_top200_2020['date']).year
data_top200_2019['year'] = pd.DatetimeIndex(data_top200_2019['date']).year

#Concateno los datos de los dos años
data_top200 = pd.concat([data_top200_2020, data_top200_2019], axis=0,)

print(data_top200.dtypes)
print(data_top200.nunique()) # Ojo, debería haber el mismo número de Track Names y spotify ID!!
print(data_top200.shape)

Position               int64
Track Name            object
Artist                object
Streams                int64
date          datetime64[ns]
spotify_id            object
year                   int64
dtype: object
Position        200
Track Name      738
Artist          297
Streams       20942
date            122
spotify_id      807
year              2
dtype: int64
(24400, 7)


In [5]:
data_top200.loc[data_top200['spotify_id'] == '6NfrH0ANGmgBXyxgV2PeXt']

Unnamed: 0,Position,Track Name,Artist,Streams,date,spotify_id,year
1,2,La Difícil,Bad Bunny,399788,2020-03-14,6NfrH0ANGmgBXyxgV2PeXt,2020
201,2,La Difícil,Bad Bunny,372705,2020-03-15,6NfrH0ANGmgBXyxgV2PeXt,2020
401,2,La Difícil,Bad Bunny,389384,2020-03-16,6NfrH0ANGmgBXyxgV2PeXt,2020
601,2,La Difícil,Bad Bunny,390067,2020-03-17,6NfrH0ANGmgBXyxgV2PeXt,2020
801,2,La Difícil,Bad Bunny,392738,2020-03-18,6NfrH0ANGmgBXyxgV2PeXt,2020
...,...,...,...,...,...,...,...
11217,18,La Difícil,Bad Bunny,165292,2020-05-09,6NfrH0ANGmgBXyxgV2PeXt,2020
11426,27,La Difícil,Bad Bunny,151313,2020-05-10,6NfrH0ANGmgBXyxgV2PeXt,2020
11628,29,La Difícil,Bad Bunny,166240,2020-05-11,6NfrH0ANGmgBXyxgV2PeXt,2020
11828,29,La Difícil,Bad Bunny,143671,2020-05-12,6NfrH0ANGmgBXyxgV2PeXt,2020


In [6]:
# Agrupar sumas y convertir en Dataframe
data_top200_sumastreams = data_top200.groupby('spotify_id')['Streams'].sum()
data_top200_sumastreams = pd.DataFrame({'spotify_id':data_top200_sumastreams.index, 'Streams':data_top200_sumastreams.values})
print(data_top200_sumastreams.shape)
data_top200_sumastreams.head()

(807, 2)


Unnamed: 0,spotify_id,Streams
0,00ZBADBKZGwnzGIAA6U9Fb,44107
1,017PF4Q3l4DBUiWoXk4OWT,2814658
2,047WmwIeerHyIUstFAEz5A,3060737
3,04wvWMRKKxK9TGG4IPk32d,90156
4,059bcIhyc2SBwm6sw2AZzd,6447442


Tengo 24400 filas en canciones, de las cuales hay 738 canciones únicas (Dataframe 1, si hago un groupby para que me sume los Streams de las mismas canciones se hace sin problema y tengo un Dataframe 2

Pero ahora no quiero perder las demás columnas haciendo groupby, pero es la forma fácil de hacer las sumas de Streams. Pero entonces quisiera añadir en nueva columna en el dataframe 1 el dato de Streams totales de dicha canción. Lo ideal sería hacer un bucle que mirara si el Track name es el mismo, entonces en una nueva columna añadiría ese dato para esa fila.

SOLUCIONADO CON UN MERGE!!!

In [31]:
#Después de muchos intentos, encontré una forma superfácil de hacerlo
data_top200_merged = pd.merge(data_top200, data_top200_sumastreams, on=['spotify_id'], how='inner')

data_top200_merged.rename(columns = {'Streams_x':'Streams', 'Streams_y':'Streamstotal'},  inplace = True)
print(data_top200_merged.shape)
data_top200_merged

# Comprobaciones
# data_top200_sumastreams[data_top200_sumastreams['spotify_id'] == '7k4t7uLgtOxPwTpFmtJNTY']
# data_top200[data_top200['spotify_id'] == '7k4t7uLgtOxPwTpFmtJNTY']

(24400, 8)


Unnamed: 0,Position,Track Name,Artist,Streams,date,spotify_id,year,Streamstotal
0,1,Tusa,KAROL G,446086,2020-03-14,7k4t7uLgtOxPwTpFmtJNTY,2020,18848892
1,1,Tusa,KAROL G,438199,2020-03-15,7k4t7uLgtOxPwTpFmtJNTY,2020,18848892
2,1,Tusa,KAROL G,474411,2020-03-16,7k4t7uLgtOxPwTpFmtJNTY,2020,18848892
3,1,Tusa,KAROL G,495344,2020-03-17,7k4t7uLgtOxPwTpFmtJNTY,2020,18848892
4,1,Tusa,KAROL G,489980,2020-03-18,7k4t7uLgtOxPwTpFmtJNTY,2020,18848892
...,...,...,...,...,...,...,...,...
24395,190,Cómo Te Atreves,Morat,28963,2019-05-13,7M6CFruBrM5x7u0lTMtm6r,2019,54967
24396,195,Someone You Loved,Lewis Capaldi,25786,2019-05-12,2TIlqbIneP0ZY1O0EzYLlc,2019,55077
24397,187,Someone You Loved,Lewis Capaldi,29291,2019-05-13,2TIlqbIneP0ZY1O0EzYLlc,2019,55077
24398,140,Hola Señorita,Maître Gims,36831,2019-05-13,5vLEmh5EolySKTvXsyWSOg,2019,36831


In [56]:
#Sacar lista de canciones únicas para luego sacar las Features
list_ids = data_top200_merged['spotify_id'].tolist()
len(list_ids)
list_ids[0]
len(list_ids)

24400

In [57]:
#Quitar duplicados 
list_ids = list(set(list_ids))
len(list_ids)


# test_list = random.sample(list_ids, 4)

test_list = list_ids[0:200]
len(list_ids)

807

# Data from API SPOTIFY



Proceso:

- Investigar cómo sacar features en serie con ID de canciones con API de Spotify
- Descargar features de las listas de top200
- Juntar Dataframes de features con el de top200 

In [10]:
# Instalaciones previas
#!pip install spotipy

In [60]:
#Preparación de Spotipy
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import time 

passw = pd.read_csv("pass_spotify.txt", sep = ',', encoding="utf-8")
client_id = passw.columns[0]
client_secret = passw.columns[1]

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [61]:
#Ver las features que hay de Tracks
track_ids = sp.user_playlist_tracks(playlist_id='1ebwiiN18Gmhfj6e24pT3X', fields='items(track(id))')
sp.audio_features(track_ids['items'][5]['track']['id'])

[{'danceability': 0.669,
  'energy': 0.829,
  'key': 1,
  'loudness': -3.801,
  'mode': 1,
  'speechiness': 0.49,
  'acousticness': 0.179,
  'instrumentalness': 0,
  'liveness': 0.241,
  'valence': 0.61,
  'tempo': 118.934,
  'type': 'audio_features',
  'id': '2eOuL8KesslTLQERQPu11D',
  'uri': 'spotify:track:2eOuL8KesslTLQERQPu11D',
  'track_href': 'https://api.spotify.com/v1/tracks/2eOuL8KesslTLQERQPu11D',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/2eOuL8KesslTLQERQPu11D',
  'duration_ms': 253067,
  'time_signature': 4}]

In [44]:
'''#Ver las features que hay de Artists
track_ids = sp.user_playlist_tracks(playlist_id='1ebwiiN18Gmhfj6e24pT3X', fields='items(track(id))')
sp.audio_features(track_ids['items'][5]['track']['id'])
'''

"#Ver las features que hay de Artists\ntrack_ids = sp.user_playlist_tracks(playlist_id='1ebwiiN18Gmhfj6e24pT3X', fields='items(track(id))')\nsp.audio_features(track_ids['items'][5]['track']['id'])\n"

In [62]:
# Función para sacar las features de canciones que me interesan desde la id
def getTrackFeatures(id):
  meta = sp.track(id)
  features = sp.audio_features(id)

  # meta
  name = meta['name']
  album = meta['album']['name']
  artist = meta['album']['artists'][0]['name']
  release_date = meta['album']['release_date']
  length = meta['duration_ms']
  popularity = meta['popularity']

  # features
  acousticness = features[0]['acousticness']
  danceability = features[0]['danceability']
  energy = features[0]['energy']
  instrumentalness = features[0]['instrumentalness']
  liveness = features[0]['liveness']
  loudness = features[0]['loudness']
  speechiness = features[0]['speechiness']
  valence = features[0]['valence']
  tempo = features[0]['tempo']
  time_signature = features[0]['time_signature']
  id = features[0]['id']

  track = [name, album, artist, release_date, length, popularity,
           acousticness, danceability, energy, instrumentalness,
           liveness, loudness, speechiness, valence, tempo, time_signature, id]
  return track

In [46]:
'''# Loop para sacar features de todas las canciones de una lista
# OJO, tarda bastante dependendo del número de canciones, por lo que antes hago un testeo
import time
list_toextract = test_list

tracks = []

Time1 = datetime.now()


for i in range(len(list_toextract)):
    time.sleep(.5)
    track = getTrackFeatures(list_toextract[i])
    tracks.append(track)
    
Time2 = datetime.now()


Time1_v = datetime.now()

for i in range(len(list_toextract)):
    # time.sleep(.5)
    track = getTrackFeatures(list_toextract[i])
    tracks.append(track)
    
Time2_v = datetime.now()

print("Tiempo 1:", Time2 -Time1)
print("Tiempo 2:", Time2_v -Time1_v)
'''

'# Loop para sacar features de todas las canciones de una lista\n# OJO, tarda bastante dependendo del número de canciones, por lo que antes hago un testeo\nimport time\nlist_toextract = test_list\n\ntracks = []\n\nTime1 = datetime.now()\n\n\nfor i in range(len(list_toextract)):\n    time.sleep(.5)\n    track = getTrackFeatures(list_toextract[i])\n    tracks.append(track)\n    \nTime2 = datetime.now()\n\n\nTime1_v = datetime.now()\n\nfor i in range(len(list_toextract)):\n    # time.sleep(.5)\n    track = getTrackFeatures(list_toextract[i])\n    tracks.append(track)\n    \nTime2_v = datetime.now()\n\nprint("Tiempo 1:", Time2 -Time1)\nprint("Tiempo 2:", Time2_v -Time1_v)\n'

In [63]:
# Ejecuto el script, tarce bastante el proceso, 3 minutos
import time
list_toextract = test_list

tracks = []

Time1 = datetime.now()
for i in range(len(list_toextract)):
    track = getTrackFeatures(list_toextract[i])
    tracks.append(track)
Time2 = datetime.now()

print("Tiempo ejecución:", Time2 -Time1)


Tiempo ejecución: 0:00:33.449013


In [64]:
# Meterlo en Dataframe
data_final = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date',
                                             'length', 'popularity','acousticness', 'danceability', 'energy',
                                             'instrumentalness', 'liveness', 'loudness',
                                             'speechiness', 'valence','tempo', 'time_signature', 'id'])

data_final_selected = data_final [['album','release_date','length', 'popularity',
                                             'acousticness', 'danceability', 'energy',
                                             'instrumentalness', 'liveness', 'loudness',
                                             'speechiness', 'valence','tempo', 'time_signature', 'id']]

data_final_selected = data_final_selected.rename(columns = {'id':'spotify_id'})


In [66]:
data_final_selected.shape

(200, 15)

In [32]:
print(data_final_selected.shape)
data_final_selected.head(2)

(200, 15)


Unnamed: 0,album,release_date,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo,time_signature,spotify_id
0,3 (The Purple Album),2018-10-26,205440,65,0.172,0.538,0.409,0.0,0.11,-6.581,0.0505,0.265,171.838,4,3id2EVGKc3eFAMn9nFnB0r
1,Colores,2020-03-19,157733,86,0.013,0.641,0.857,0.00534,0.0695,-5.725,0.301,0.961,122.728,5,6zEgnpM0qYmHLDnh8WPejL


# Unir Dataframes en uno

In [67]:
data_features = pd.merge(data_top200_merged, data_final_selected, on=['spotify_id'], how='inner')
print(data_features.shape)


# Exportar a CSV
data_features.to_csv("data_features.csv", sep = ',')



(6152, 22)


In [70]:
print(data_features.shape)
data_features.head(20)

(6152, 22)


Unnamed: 0,Position,Track Name,Artist,Streams,date,spotify_id,year,Streamstotal,album,release_date,...,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo,time_signature
0,2,La Difícil,Bad Bunny,399788,2020-03-14,6NfrH0ANGmgBXyxgV2PeXt,2020,14996733,YHLQMDLG,2020-02-28,...,0.0861,0.685,0.848,7e-06,0.0783,-4.561,0.0858,0.761,179.87,4
1,2,La Difícil,Bad Bunny,372705,2020-03-15,6NfrH0ANGmgBXyxgV2PeXt,2020,14996733,YHLQMDLG,2020-02-28,...,0.0861,0.685,0.848,7e-06,0.0783,-4.561,0.0858,0.761,179.87,4
2,2,La Difícil,Bad Bunny,389384,2020-03-16,6NfrH0ANGmgBXyxgV2PeXt,2020,14996733,YHLQMDLG,2020-02-28,...,0.0861,0.685,0.848,7e-06,0.0783,-4.561,0.0858,0.761,179.87,4
3,2,La Difícil,Bad Bunny,390067,2020-03-17,6NfrH0ANGmgBXyxgV2PeXt,2020,14996733,YHLQMDLG,2020-02-28,...,0.0861,0.685,0.848,7e-06,0.0783,-4.561,0.0858,0.761,179.87,4
4,2,La Difícil,Bad Bunny,392738,2020-03-18,6NfrH0ANGmgBXyxgV2PeXt,2020,14996733,YHLQMDLG,2020-02-28,...,0.0861,0.685,0.848,7e-06,0.0783,-4.561,0.0858,0.761,179.87,4
5,2,La Difícil,Bad Bunny,376102,2020-03-19,6NfrH0ANGmgBXyxgV2PeXt,2020,14996733,YHLQMDLG,2020-02-28,...,0.0861,0.685,0.848,7e-06,0.0783,-4.561,0.0858,0.761,179.87,4
6,7,La Difícil,Bad Bunny,369416,2020-03-20,6NfrH0ANGmgBXyxgV2PeXt,2020,14996733,YHLQMDLG,2020-02-28,...,0.0861,0.685,0.848,7e-06,0.0783,-4.561,0.0858,0.761,179.87,4
7,5,La Difícil,Bad Bunny,348775,2020-03-21,6NfrH0ANGmgBXyxgV2PeXt,2020,14996733,YHLQMDLG,2020-02-28,...,0.0861,0.685,0.848,7e-06,0.0783,-4.561,0.0858,0.761,179.87,4
8,5,La Difícil,Bad Bunny,318594,2020-03-22,6NfrH0ANGmgBXyxgV2PeXt,2020,14996733,YHLQMDLG,2020-02-28,...,0.0861,0.685,0.848,7e-06,0.0783,-4.561,0.0858,0.761,179.87,4
9,5,La Difícil,Bad Bunny,345791,2020-03-23,6NfrH0ANGmgBXyxgV2PeXt,2020,14996733,YHLQMDLG,2020-02-28,...,0.0861,0.685,0.848,7e-06,0.0783,-4.561,0.0858,0.761,179.87,4


In [36]:
artist_unique = data_features.groupby('Artist')['spotify_id'].nunique()
artist_unique.shape
artist_unique

Artist
6ix9ine           1
Abraham Mateo     1
Aitana            3
Alejandro Sanz    3
Alex Rose         1
                 ..
The Weeknd        2
Trevor Daniel     1
Vanesa Martín     2
Wisin & Yandel    1
Zion & Lennox     1
Name: spotify_id, Length: 124, dtype: int64

# Extracción del género canciones con algoritmo clasificación

Proceso:
 - Investigar sobre donde sacar la info de géneros
 - Encontrar dataset con género de canciones
 - Crear varios algoritmos de clasificación y probar (random forest? SVM?)
 - Clasificar todas las canciones y añadir columna al dataset final.



Podría hacer un algoritmo de clasificación por géneros pero se descarta porque su accuracy es muy bajo según he visto en otros análisis.

Most listened genres: Pop, Rock, Oldies, Hip-hop/Rap, Dance/Electronic, Indie/Alternative, K-Pop, Metal, R&B, Classical
https://celebrityaccess.com/2019/10/05/pop-is-still-the-1-music-genre-worldwide/


Most popular genres:
1	☊	pop
2	☊	dance pop
3	☊	rap
4	☊	pop rap
5	☊	rock
6	☊	post-teen pop
7	☊	latin
8	☊	hip hop
9	☊	trap
10	☊	edm
11	☊	modern rock
12	☊	tropical house
13	☊	pop rock
14	☊	melodic rap
15	☊	reggaeton
16	☊	latin pop
17	☊	classic rock
18	☊	electropop
19	☊	mellow gold
20	☊	album rock
http://everynoise.com/everynoise1d.cgi?vector=popularity&scope=all

## Cómo extraer los genres de spotify directamente

- Sacar el id de artista a través de su Nombre
- Con su id de artista sacar los genres.
- Unir con el dataframe grande.

In [95]:
# Este script coge los artistas con un determinado genre, es el original
API_LIMIT = 50

number_of_tracks = 2000
genre = 'jazz'

search_runs = int(number_of_tracks / API_LIMIT)

search_list = []
for i in range(search_runs):
    print("Call #{} for tracks".format(i+1))
    search_results = sp.search('genre:"{}"'.format(genre), type="track",  limit=API_LIMIT, offset=API_LIMIT*i)

    search_list += [[t["id"], t["name"], t["artists"][0]["id"], t["artists"][0]["name"],
                            t["album"]["name"], t["popularity"]]
                           for t in search_results['tracks']['items']]

df_search = pd.DataFrame(search_list, 
                         columns=["id", "song_name", "artist_id", "artist_name", "album_name", "popularity"])
df_search["popularity_norm"] = df_search["popularity"] / 100.
df_search.head()

Call #1 for tracks


KeyError: 'tracks'

In [135]:
# Mi prueba que al final sale bien!!!!! Peeeeero, cómo hago para que en vez de uno, pase una lista de canciones??
API_LIMIT = 50

number_of_tracks = 2000
artist = ['Earth, Wind & Fire']

search_runs = int(number_of_tracks / API_LIMIT)

search_list = []
for i in range(search_runs):
    print("Call #{} for tracks".format(i+1))
    search_results = sp.search(q = 'artist:"{}"'.format(artist), type="artist",  limit=API_LIMIT, offset=API_LIMIT*i)

    search_list += [[t["id"], t["name"], t["genres"], t['followers']['total'], t['popularity']] for t in search_results['artists']['items']]

df_search = pd.DataFrame(search_list, columns=["id", "name", "genres", 'followers', 'popularity'])
df_search.head()


Call #1 for tracks
Call #2 for tracks
Call #3 for tracks
Call #4 for tracks
Call #5 for tracks
Call #6 for tracks
Call #7 for tracks
Call #8 for tracks
Call #9 for tracks
Call #10 for tracks
Call #11 for tracks
Call #12 for tracks
Call #13 for tracks
Call #14 for tracks
Call #15 for tracks
Call #16 for tracks
Call #17 for tracks
Call #18 for tracks
Call #19 for tracks
Call #20 for tracks
Call #21 for tracks
Call #22 for tracks
Call #23 for tracks
Call #24 for tracks
Call #25 for tracks
Call #26 for tracks
Call #27 for tracks
Call #28 for tracks
Call #29 for tracks
Call #30 for tracks
Call #31 for tracks
Call #32 for tracks
Call #33 for tracks
Call #34 for tracks
Call #35 for tracks
Call #36 for tracks
Call #37 for tracks
Call #38 for tracks
Call #39 for tracks
Call #40 for tracks


Unnamed: 0,id,name,genres,followers,popularity
0,4QQgXkCYTt3BlENzhyNETg,"Earth, Wind & Fire","[disco, funk, jazz funk, motown, quiet storm, ...",2446678,75
1,226jrsA555BFtGpZOYYcb9,Earth Wind & Fire Experience,[],794,16
2,02u5ByuYu2deBv3M54kai9,"Earth, Wind, And Fire",[],24,9
3,5KnTcsGDDJKlMNx4BZROur,"Earth, Wind & Fire, Chicago",[],231,0
4,3P4OTZWCoJVpxCSfj5pOlB,"""Karaoke - Earth, Wind & Fire""",[],15,0


In [130]:
search_results = sp.search('artist:"{}"'.format(artist), type="artist",  limit=API_LIMIT, offset=API_LIMIT*i)
search_results

{'artists': {'href': 'https://api.spotify.com/v1/search?query=artist%3A%22Earth%2C+Wind+%26+Fire%22&type=artist&offset=1950&limit=50',
  'items': [],
  'limit': 50,
  'next': None,
  'offset': 1950,
  'previous': 'https://api.spotify.com/v1/search?query=artist%3A%22Earth%2C+Wind+%26+Fire%22&type=artist&offset=1900&limit=50',
  'total': 6}}

In [131]:
# Para qué info nos da de artista:
artist = 'Earth, Wind & Fire'

search_results = sp.search(q = 'artist:' + 'Earth, Wind & Fire', type="artist")
items = search_results['artists']['items']
if len(items) > 0:
    artist = items[0]
    print(artist['name'], artist['images'][0]['url'])


# search_list += [[t["id"], t["name"], t["artists"][0]["id"], t["artists"][0]["name"], t["album"]["name"], t["popularity"]] for t in search_results['tracks']['items']]
# search_list


#results = spotify.search(q='artist:' + name, type='artist')
search_results



Earth, Wind & Fire https://i.scdn.co/image/9fd0a9822140cce668ee15263e1f73730152dff0


{'artists': {'href': 'https://api.spotify.com/v1/search?query=artist%3AEarth%2C+Wind+%26+Fire&type=artist&offset=0&limit=10',
  'items': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/4QQgXkCYTt3BlENzhyNETg'},
    'followers': {'href': None, 'total': 2446678},
    'genres': ['disco', 'funk', 'jazz funk', 'motown', 'quiet storm', 'soul'],
    'href': 'https://api.spotify.com/v1/artists/4QQgXkCYTt3BlENzhyNETg',
    'id': '4QQgXkCYTt3BlENzhyNETg',
    'images': [{'height': 678,
      'url': 'https://i.scdn.co/image/9fd0a9822140cce668ee15263e1f73730152dff0',
      'width': 999},
     {'height': 434,
      'url': 'https://i.scdn.co/image/82b1b00bee789f4fefb07756f4d54c411a899e76',
      'width': 640},
     {'height': 136,
      'url': 'https://i.scdn.co/image/cde8bbe95db0ede933e3b081c3bc359898f5ce9e',
      'width': 200},
     {'height': 43,
      'url': 'https://i.scdn.co/image/e8cfe44ad187da7e0d4a9699e02039b0dcf6aa66',
      'width': 63}],
    'name': 'Earth, Wind & Fire',

In [57]:
'''
def _get_features_df(sp, track_ids):
    """
    This is an helper method to get track's features with pagination from track ids.
    It returns a Pandas dataframe
    """

    feature_list = []
    i = 0
    while track_ids:
        print("Call #{} for audio features".format(i + 1))
        features_results = sp.audio_features(track_ids[:API_LIMIT])

        feature_list += features_results
        
        track_ids = track_ids[API_LIMIT:]
        i += 1
    return feature_list

_get_features_df(sp, '7k4t7uLgtOxPwTpFmtJNTY')
'''

Call #1 for audio features


[{'danceability': 0.803,
  'energy': 0.715,
  'key': 2,
  'loudness': -3.28,
  'mode': 1,
  'speechiness': 0.298,
  'acousticness': 0.295,
  'instrumentalness': 0.000134,
  'liveness': 0.0574,
  'valence': 0.574,
  'tempo': 101.085,
  'type': 'audio_features',
  'id': '7k4t7uLgtOxPwTpFmtJNTY',
  'uri': 'spotify:track:7k4t7uLgtOxPwTpFmtJNTY',
  'track_href': 'https://api.spotify.com/v1/tracks/7k4t7uLgtOxPwTpFmtJNTY',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/7k4t7uLgtOxPwTpFmtJNTY',
  'duration_ms': 200960,
  'time_signature': 4}]

In [77]:
artists_results = sp.artists(artist_ids[:API_LIMIT])
artists_results.keys()
# Para entrar en el diccionario y dentro hay una lista y dentro otro diccionario
artists_results['artists'][0]['genres']



NameError: name 'artist_ids' is not defined

In [78]:
#Pra obtener los artists id
def _get_artists_df(sp, artist_ids):
    """
    This is an helper method to get artist's information with pagination from artist ids.
    It returns a Pandas dataframe
    """

    artist_list = []
    i = 0

    while artist_ids:
        print("Call #{} for artists".format(i + 1))
        artists_results = sp.artists(artist_ids[:API_LIMIT])

        artist_list += [[t["id"], t["genres"], t["popularity"]] for t in artists_results["artists"]]

        artist_ids = artist_ids[API_LIMIT:]
        i += 1

    df_artists = pd.DataFrame(artist_list, columns=["artist_id", "artist_genres", "artist_popularity"])

    df_artists["artist_popularity_norm"] = df_artists["artist_popularity"] / 100.

    return df_artists

# https://github.com/jvichare/rshiny-music-visualization/blob/master/Spotify%20Song%20Query.ipynb

In [79]:
artist_ids = df_search["artist_id"].unique().tolist()
df_artists = _get_artists_df(sp, artist_ids)
df_artists.head(20)

Call #1 for artists
Call #2 for artists
Call #3 for artists
Call #4 for artists
Call #5 for artists
Call #6 for artists
Call #7 for artists
Call #8 for artists
Call #9 for artists
Call #10 for artists
Call #11 for artists
Call #12 for artists
Call #13 for artists
Call #14 for artists


Unnamed: 0,artist_id,artist_genres,artist_popularity,artist_popularity_norm
0,4QQgXkCYTt3BlENzhyNETg,"[disco, funk, jazz funk, motown, quiet storm, ...",75,0.75
1,0iOVhN3tnSvgDbcg25JoJb,"[adult standards, jazz blues, soul, soul blues...",68,0.68
2,19eLuQmk9aCobbVDHc6eek,"[adult standards, dixieland, harlem renaissanc...",72,0.72
3,5V0MlUE1Bft0mbLlND7FJz,"[adult standards, jazz blues, swing, vocal jazz]",71,0.71
4,7G1GBhoKtEPnP86X2PvEYO,"[jazz blues, soul, soul jazz, torch song, voca...",72,0.72
5,2Kx7MNY7cI1ENniW7vT30N,"[adult standards, contemporary vocal jazz, neo...",75,0.75
6,09hVIj6vWgoCDtT03h8ZCa,"[alternative hip hop, conscious hip hop, east ...",70,0.7
7,05YVYeV4HxYp5rrWalvuE1,"[jazz funk, smooth jazz]",59,0.59
8,5zaXYwewAXedKNCff45U5l,"[adult standards, brill building pop, british ...",66,0.66
9,7yk35uHNQclPXFGFoTU44w,"[alternative hip hop, hardcore hip hop, hip ho...",62,0.62


In [72]:
artist_ids

NameError: name 'artist_ids' is not defined