In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
import json
import string
import pandas as pd 
import numpy as np
import ast
pd.set_option('display.max_columns', None)

In [2]:
# Leyendo credenciales de Spotify web API del fichero settings.env

with open('settings.env') as f:
    env_vars = json.loads(f.read())

### Conexion Spotify

In [3]:
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=env_vars['cid'], 
                                                                              client_secret=env_vars['secret'], requests_timeout = 100))
print(spotify.requests_timeout)

5


### Lectura IDs de Canciones Top e IDs Random

In [310]:
df_top_tracks_id = pd.read_csv( '2000-2022_top_tracks_id.csv', index_col=0)

df_random_tracks_id = pd.read_csv('2000-2022_random_tracks_id.csv', index_col=0)

In [311]:
list_tracks = pd.concat([df_top_tracks_id,df_random_tracks_id], ignore_index=True)['track_id']

In [312]:
list_tracks.size

6277

In [313]:
list_tracks.drop_duplicates(inplace=True)
list_tracks.size

6277

### Extrayendo Informacion de Tracks

Obteniendo información del catálogo de Spotify para una sola pista identificada por su ID de Spotify único.

https://developer.spotify.com/documentation/web-api/reference/#/operations/get-track

In [7]:
def extract_tracks_info(list_tracks):  
    df_tracks_info = pd.DataFrame()  
    for track in list_tracks:
        track = spotify.track(track)
        df_track = pd.json_normalize(track)
        df_tracks_info = pd.concat([df_tracks_info, df_track], ignore_index=True)
    return df_tracks_info

In [314]:
#parto el listado para no tener problemas de limintacion con la API de Spotify

list_track_part_1 = list_tracks[1:1001]
list_track_part_2 = list_tracks[1001:2001]
list_track_part_3 = list_tracks[2001:3001]
list_track_part_4 = list_tracks[3001:4001]
list_track_part_5 = list_tracks[4001:5001]
list_track_part_6 = list_tracks[5001:6278]

In [20]:
df_tracks_info_part1 = extract_tracks_info(list_track_part_1)

In [21]:
df_tracks_info_part2 = extract_tracks_info(list_track_part_2)

In [22]:
df_tracks_info_part3 = extract_tracks_info(list_track_part_3)

In [23]:
df_tracks_info_part4 = extract_tracks_info(list_track_part_4)

In [24]:
df_tracks_info_part5 = extract_tracks_info(list_track_part_5)

In [25]:
df_tracks_info_part6 = extract_tracks_info(list_track_part_6)

In [26]:
(df_tracks_info_part1.shape, df_tracks_info_part2.shape, df_tracks_info_part3.shape, df_tracks_info_part4.shape, df_tracks_info_part5.shape, df_tracks_info_part6.shape)

((1000, 29), (1000, 29), (1000, 29), (1000, 29), (1000, 29), (1276, 29))

In [31]:
# concateno daframes obtenidos con informacion de canciones

df_tracks_info = pd.DataFrame()  
df_tracks_info = pd.concat([df_tracks_info_part1, df_tracks_info_part2], ignore_index=True)
df_tracks_info = pd.concat([df_tracks_info, df_tracks_info_part3], ignore_index=True)
df_tracks_info = pd.concat([df_tracks_info, df_tracks_info_part4], ignore_index=True)
df_tracks_info = pd.concat([df_tracks_info, df_tracks_info_part5], ignore_index=True)
df_tracks_info = pd.concat([df_tracks_info, df_tracks_info_part6], ignore_index=True)

df_tracks_info.shape

(6276, 29)

In [65]:
df_tracks_info.head()

Unnamed: 0,artists,available_markets,disc_number,duration_ms,explicit,href,id,is_local,name,popularity,preview_url,track_number,type,uri,album.album_type,album.artists,album.available_markets,album.external_urls.spotify,album.href,album.id,album.images,album.name,album.release_date,album.release_date_precision,album.total_tracks,album.type,album.uri,external_ids.isrc,external_urls.spotify,id_artist_1,name_artist_1,id_artist_2,name_artist_2
0,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AG, AL, AM, AO, AR, AT, AU, AZ, BA, B...",1,163636,True,https://api.spotify.com/v1/tracks/696DnlkuDOXc...,696DnlkuDOXcMAnKlTgXXK,False,ROXANNE,75,https://p.scdn.co/mp3-preview/fb038785189bb627...,1,track,spotify:track:696DnlkuDOXcMAnKlTgXXK,single,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AG, AL, AM, AO, AR, AT, AU, AZ, BA, B...",https://open.spotify.com/album/6HJDrXs0hpebaRF...,https://api.spotify.com/v1/albums/6HJDrXs0hpeb...,6HJDrXs0hpebaRFKA1sF90,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",ROXANNE,2019-10-10,day,1,album,spotify:album:6HJDrXs0hpebaRFKA1sF90,USSM11914320,https://open.spotify.com/track/696DnlkuDOXcMAn...,0vRvGUQVUjytro0xpb26bs,Arizona Zervas,,
1,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AG, AL, AM, AO, AR, AT, AU, AZ, BA, B...",1,200960,False,https://api.spotify.com/v1/tracks/7k4t7uLgtOxP...,7k4t7uLgtOxPwTpFmtJNTY,False,Tusa,74,,1,track,spotify:track:7k4t7uLgtOxPwTpFmtJNTY,single,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AG, AL, AM, AO, AR, AT, AU, AZ, BA, B...",https://open.spotify.com/album/7mKevNHhVnZER3B...,https://api.spotify.com/v1/albums/7mKevNHhVnZE...,7mKevNHhVnZER3BLgI8O4F,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Tusa,2019-11-07,day,1,album,spotify:album:7mKevNHhVnZER3BLgI8O4F,USUM71921183,https://open.spotify.com/track/7k4t7uLgtOxPwTp...,790FomKkXshlbRYZFtlgla,KAROL G,0hCNtLu0JehylgoiP8L4Gh,Nicki Minaj
2,[{'external_urls': {'spotify': 'https://open.s...,[KR],1,189486,False,https://api.spotify.com/v1/tracks/2b8fOow8UzyD...,2b8fOow8UzyDFAE27YhOZM,False,Memories,28,,1,track,spotify:track:2b8fOow8UzyDFAE27YhOZM,single,[{'external_urls': {'spotify': 'https://open.s...,[KR],https://open.spotify.com/album/3nR9B40hYLKLcR0...,https://api.spotify.com/v1/albums/3nR9B40hYLKL...,3nR9B40hYLKLcR0Eph3Goc,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Memories,2019-09-20,day,1,album,spotify:album:3nR9B40hYLKLcR0Eph3Goc,USUM71913350,https://open.spotify.com/track/2b8fOow8UzyDFAE...,04gDigrS5kc9YWfZHwBETP,Maroon 5,,
3,[{'external_urls': {'spotify': 'https://open.s...,[],1,159381,False,https://api.spotify.com/v1/tracks/4TnjEaWOeW0e...,4TnjEaWOeW0eKTKIEvJyCa,False,Falling,3,,1,track,spotify:track:4TnjEaWOeW0eKTKIEvJyCa,single,[{'external_urls': {'spotify': 'https://open.s...,[],https://open.spotify.com/album/1Czfd5tEby3DbdY...,https://api.spotify.com/v1/albums/1Czfd5tEby3D...,1Czfd5tEby3DbdYNdqzrCa,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Falling,2018-10-05,day,1,album,spotify:album:1Czfd5tEby3DbdYNdqzrCa,USUYG1221109,https://open.spotify.com/track/4TnjEaWOeW0eKTK...,7uaIm6Pw7xplS8Dy06V6pT,Trevor Daniel,,
4,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AG, AL, AM, AO, AR, AT, AU, AZ, BA, B...",1,215280,False,https://api.spotify.com/v1/tracks/21jGcNKet2qw...,21jGcNKet2qwijlDFuPiPb,False,Circles,83,,6,track,spotify:track:21jGcNKet2qwijlDFuPiPb,album,[{'external_urls': {'spotify': 'https://open.s...,"[AD, AE, AG, AL, AM, AO, AR, AT, AU, AZ, BA, B...",https://open.spotify.com/album/4g1ZRSobMefqF6n...,https://api.spotify.com/v1/albums/4g1ZRSobMefq...,4g1ZRSobMefqF6nelkgibi,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Hollywood's Bleeding,2019-09-06,day,17,album,spotify:album:4g1ZRSobMefqF6nelkgibi,USUM71915699,https://open.spotify.com/track/21jGcNKet2qwijl...,246dkjvS1zLTtiykXe5h60,Post Malone,,


In [4]:
# Para no perder la informacion extraida de las canciones de la API, añado paso intermedio de escritura a un fichero temporal

#df_tracks_info.to_csv('2000-2022_all_tracks_info.csv', encoding='utf-8', index=False)

#### Extrayendo Informacion de Artista

Obteniendo información del catálogo de Spotify para un solo artista identificado por su ID de Spotify único.

https://developer.spotify.com/documentation/web-api/reference/#/operations/get-an-artist

Obtener de la columna "artists" los IDs de los artistas de cada cancion

In [7]:
# lectura de fichero intermedio con extraccion de canciones

# df_tracks_info = pd.read_csv("2000-2022_all_tracks_info.csv", encoding='utf-8')

In [11]:
df_tracks_info.head()

Unnamed: 0,artists,available_markets,disc_number,duration_ms,explicit,href,id,is_local,name,popularity,preview_url,track_number,type,uri,album.album_type,album.artists,album.available_markets,album.external_urls.spotify,album.href,album.id,album.images,album.name,album.release_date,album.release_date_precision,album.total_tracks,album.type,album.uri,external_ids.isrc,external_urls.spotify
0,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,163636,True,https://api.spotify.com/v1/tracks/696DnlkuDOXc...,696DnlkuDOXcMAnKlTgXXK,False,ROXANNE,75,https://p.scdn.co/mp3-preview/fb038785189bb627...,1,track,spotify:track:696DnlkuDOXcMAnKlTgXXK,single,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",https://open.spotify.com/album/6HJDrXs0hpebaRF...,https://api.spotify.com/v1/albums/6HJDrXs0hpeb...,6HJDrXs0hpebaRFKA1sF90,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",ROXANNE,2019-10-10,day,1,album,spotify:album:6HJDrXs0hpebaRFKA1sF90,USSM11914320,https://open.spotify.com/track/696DnlkuDOXcMAn...
1,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,200960,False,https://api.spotify.com/v1/tracks/7k4t7uLgtOxP...,7k4t7uLgtOxPwTpFmtJNTY,False,Tusa,74,,1,track,spotify:track:7k4t7uLgtOxPwTpFmtJNTY,single,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",https://open.spotify.com/album/7mKevNHhVnZER3B...,https://api.spotify.com/v1/albums/7mKevNHhVnZE...,7mKevNHhVnZER3BLgI8O4F,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Tusa,2019-11-07,day,1,album,spotify:album:7mKevNHhVnZER3BLgI8O4F,USUM71921183,https://open.spotify.com/track/7k4t7uLgtOxPwTp...
2,[{'external_urls': {'spotify': 'https://open.s...,['KR'],1,189486,False,https://api.spotify.com/v1/tracks/2b8fOow8UzyD...,2b8fOow8UzyDFAE27YhOZM,False,Memories,28,,1,track,spotify:track:2b8fOow8UzyDFAE27YhOZM,single,[{'external_urls': {'spotify': 'https://open.s...,['KR'],https://open.spotify.com/album/3nR9B40hYLKLcR0...,https://api.spotify.com/v1/albums/3nR9B40hYLKL...,3nR9B40hYLKLcR0Eph3Goc,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Memories,2019-09-20,day,1,album,spotify:album:3nR9B40hYLKLcR0Eph3Goc,USUM71913350,https://open.spotify.com/track/2b8fOow8UzyDFAE...
3,[{'external_urls': {'spotify': 'https://open.s...,[],1,159381,False,https://api.spotify.com/v1/tracks/4TnjEaWOeW0e...,4TnjEaWOeW0eKTKIEvJyCa,False,Falling,3,,1,track,spotify:track:4TnjEaWOeW0eKTKIEvJyCa,single,[{'external_urls': {'spotify': 'https://open.s...,[],https://open.spotify.com/album/1Czfd5tEby3DbdY...,https://api.spotify.com/v1/albums/1Czfd5tEby3D...,1Czfd5tEby3DbdYNdqzrCa,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Falling,2018-10-05,day,1,album,spotify:album:1Czfd5tEby3DbdYNdqzrCa,USUYG1221109,https://open.spotify.com/track/4TnjEaWOeW0eKTK...
4,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,215280,False,https://api.spotify.com/v1/tracks/21jGcNKet2qw...,21jGcNKet2qwijlDFuPiPb,False,Circles,83,,6,track,spotify:track:21jGcNKet2qwijlDFuPiPb,album,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",https://open.spotify.com/album/4g1ZRSobMefqF6n...,https://api.spotify.com/v1/albums/4g1ZRSobMefq...,4g1ZRSobMefqF6nelkgibi,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Hollywood's Bleeding,2019-09-06,day,17,album,spotify:album:4g1ZRSobMefqF6nelkgibi,USUM71915699,https://open.spotify.com/track/21jGcNKet2qwijl...


In [12]:
#df_artists = pd.json_normalize(df_tracks_info['artists'])
df_artists = df_tracks_info['artists']

In [13]:
# Obteniendo artista 1

df_artist_1 = pd.DataFrame()

for i in range(0, len(df_artists)):
    resultado = ast.literal_eval(df_artists[i])
    df_artist_1 = pd.concat([df_artist_1, pd.json_normalize(resultado[0])], ignore_index=True)

In [14]:
df_artist_1.drop(['href', 'type', 'uri', 'external_urls.spotify'], axis=1, inplace=True)
df_artist_1.columns=['id_artist', 'name_artist']

In [15]:
# Obteniendo artista 2, en este caso se pueden obtener valores nulos

empty_row = {
 'href': np.nan,
 'id': np.nan,
 'name': np.nan,
 'type': np.nan,
 'uri': np.nan,
 'external_urls.spotify': np.nan}

df_artist_2 = pd.DataFrame()

for i in range(0,len(df_artists)):
   resultado = ast.literal_eval(df_artists[i])
   if len(resultado) == 2:
      df_artist_2 = pd.concat([df_artist_2, pd.json_normalize(resultado[1])], ignore_index=True)
   else:
      df_artist_2 = pd.concat([df_artist_2, pd.json_normalize(empty_row)], ignore_index=True )

In [16]:
df_artist_2.drop(['href', 'type', 'uri', 'external_urls.spotify'], axis=1, inplace=True)
df_artist_2.columns=['id_artist', 'name_artist']

In [17]:
# Comprobando longitud de Dataframes de artista 1 y 2

(df_artist_1.shape, df_artist_2.shape)

((6276, 2), (6276, 2))

In [18]:
# Concateno artista 1 y 2 horizontalmente 

df_artists_1_2 = pd.concat([df_artist_1, df_artist_2], axis=1)
df_artists_1_2.columns = ['id_artist_1', 'name_artist_1', 'id_artist_2', 'name_artist_2']
df_artists_1_2.head()

Unnamed: 0,id_artist_1,name_artist_1,id_artist_2,name_artist_2
0,0vRvGUQVUjytro0xpb26bs,Arizona Zervas,,
1,790FomKkXshlbRYZFtlgla,KAROL G,0hCNtLu0JehylgoiP8L4Gh,Nicki Minaj
2,04gDigrS5kc9YWfZHwBETP,Maroon 5,,
3,7uaIm6Pw7xplS8Dy06V6pT,Trevor Daniel,,
4,246dkjvS1zLTtiykXe5h60,Post Malone,,


In [19]:
# Concateno columnas de artista 1 y 2 a Dataframe de tracks

df_tracks_info = pd.concat([df_tracks_info, df_artists_1_2], axis=1)
df_tracks_info.head()

Unnamed: 0,artists,available_markets,disc_number,duration_ms,explicit,href,id,is_local,name,popularity,preview_url,track_number,type,uri,album.album_type,album.artists,album.available_markets,album.external_urls.spotify,album.href,album.id,album.images,album.name,album.release_date,album.release_date_precision,album.total_tracks,album.type,album.uri,external_ids.isrc,external_urls.spotify,id_artist_1,name_artist_1,id_artist_2,name_artist_2
0,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,163636,True,https://api.spotify.com/v1/tracks/696DnlkuDOXc...,696DnlkuDOXcMAnKlTgXXK,False,ROXANNE,75,https://p.scdn.co/mp3-preview/fb038785189bb627...,1,track,spotify:track:696DnlkuDOXcMAnKlTgXXK,single,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",https://open.spotify.com/album/6HJDrXs0hpebaRF...,https://api.spotify.com/v1/albums/6HJDrXs0hpeb...,6HJDrXs0hpebaRFKA1sF90,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",ROXANNE,2019-10-10,day,1,album,spotify:album:6HJDrXs0hpebaRFKA1sF90,USSM11914320,https://open.spotify.com/track/696DnlkuDOXcMAn...,0vRvGUQVUjytro0xpb26bs,Arizona Zervas,,
1,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,200960,False,https://api.spotify.com/v1/tracks/7k4t7uLgtOxP...,7k4t7uLgtOxPwTpFmtJNTY,False,Tusa,74,,1,track,spotify:track:7k4t7uLgtOxPwTpFmtJNTY,single,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",https://open.spotify.com/album/7mKevNHhVnZER3B...,https://api.spotify.com/v1/albums/7mKevNHhVnZE...,7mKevNHhVnZER3BLgI8O4F,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Tusa,2019-11-07,day,1,album,spotify:album:7mKevNHhVnZER3BLgI8O4F,USUM71921183,https://open.spotify.com/track/7k4t7uLgtOxPwTp...,790FomKkXshlbRYZFtlgla,KAROL G,0hCNtLu0JehylgoiP8L4Gh,Nicki Minaj
2,[{'external_urls': {'spotify': 'https://open.s...,['KR'],1,189486,False,https://api.spotify.com/v1/tracks/2b8fOow8UzyD...,2b8fOow8UzyDFAE27YhOZM,False,Memories,28,,1,track,spotify:track:2b8fOow8UzyDFAE27YhOZM,single,[{'external_urls': {'spotify': 'https://open.s...,['KR'],https://open.spotify.com/album/3nR9B40hYLKLcR0...,https://api.spotify.com/v1/albums/3nR9B40hYLKL...,3nR9B40hYLKLcR0Eph3Goc,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Memories,2019-09-20,day,1,album,spotify:album:3nR9B40hYLKLcR0Eph3Goc,USUM71913350,https://open.spotify.com/track/2b8fOow8UzyDFAE...,04gDigrS5kc9YWfZHwBETP,Maroon 5,,
3,[{'external_urls': {'spotify': 'https://open.s...,[],1,159381,False,https://api.spotify.com/v1/tracks/4TnjEaWOeW0e...,4TnjEaWOeW0eKTKIEvJyCa,False,Falling,3,,1,track,spotify:track:4TnjEaWOeW0eKTKIEvJyCa,single,[{'external_urls': {'spotify': 'https://open.s...,[],https://open.spotify.com/album/1Czfd5tEby3DbdY...,https://api.spotify.com/v1/albums/1Czfd5tEby3D...,1Czfd5tEby3DbdYNdqzrCa,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Falling,2018-10-05,day,1,album,spotify:album:1Czfd5tEby3DbdYNdqzrCa,USUYG1221109,https://open.spotify.com/track/4TnjEaWOeW0eKTK...,7uaIm6Pw7xplS8Dy06V6pT,Trevor Daniel,,
4,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,215280,False,https://api.spotify.com/v1/tracks/21jGcNKet2qw...,21jGcNKet2qwijlDFuPiPb,False,Circles,83,,6,track,spotify:track:21jGcNKet2qwijlDFuPiPb,album,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",https://open.spotify.com/album/4g1ZRSobMefqF6n...,https://api.spotify.com/v1/albums/4g1ZRSobMefq...,4g1ZRSobMefqF6nelkgibi,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Hollywood's Bleeding,2019-09-06,day,17,album,spotify:album:4g1ZRSobMefqF6nelkgibi,USUM71915699,https://open.spotify.com/track/21jGcNKet2qwijl...,246dkjvS1zLTtiykXe5h60,Post Malone,,


In [22]:
# escritura fichero intermedio de informacion de tracks con columnas de artistas

#df_tracks_info.to_csv('2000-2022_all_tracks_info_artists.csv', encoding='utf-8', index=False)

Generando tabla con informacion de artistas para obtener genero

In [20]:
df_all_artists = pd.concat([df_artist_1, df_artist_2])
df_all_artists.head()

Unnamed: 0,id_artist,name_artist
0,0vRvGUQVUjytro0xpb26bs,Arizona Zervas
1,790FomKkXshlbRYZFtlgla,KAROL G
2,04gDigrS5kc9YWfZHwBETP,Maroon 5
3,7uaIm6Pw7xplS8Dy06V6pT,Trevor Daniel
4,246dkjvS1zLTtiykXe5h60,Post Malone


In [21]:
df_all_artists.shape

(12552, 2)

In [364]:
# Eliminando duplicados de artistas del listado total

df_all_artists.drop_duplicates(inplace=True)
df_all_artists.shape

(2753, 2)

In [370]:
# Eliminando nulos del listado total de artistas

df_all_artists.dropna(inplace=True)
df_all_artists.shape

(2752, 2)

In [371]:
df_all_artists.head()

Unnamed: 0,id_artist,name_artist
0,0vRvGUQVUjytro0xpb26bs,Arizona Zervas
1,790FomKkXshlbRYZFtlgla,KAROL G
2,04gDigrS5kc9YWfZHwBETP,Maroon 5
3,7uaIm6Pw7xplS8Dy06V6pT,Trevor Daniel
4,246dkjvS1zLTtiykXe5h60,Post Malone


In [372]:
empty_row = {
 'external_urls': np.nan,
 'followers': np.nan,
 'genres': np.nan,
 'href': np.nan,
 'id' : np.nan,
 'images': np.nan,
 'name': np.nan,
 'popularity': np.nan,
 'type': np.nan,
 'uri': np.nan}

def extract_artist_info(list_artists):
    # artist =spotify.artist('3Xt3RrJMFv5SZkCfUE8C1J')
    df_artists_info = pd.DataFrame()
    for artist in list_artists:
        artist = spotify.artist(artist)
        if type(artist) != type(None):
            df_artists_info = pd.concat([df_artists_info, pd.json_normalize(artist)], ignore_index=True)
        else:
            df_artists_info = pd.concat([df_artists_info, pd.json_normalize(empty_row)], ignore_index=True )
    return df_artists_info    

In [3]:
# Pas intermedio donde se escribe fichero temporal con informacion extraida de la API para los artistas

#df_all_artists.to_csv('all_artist.csv', encoding='utf-8', index=False)

In [374]:
df_all_artists_info = extract_artist_info(df_all_artists['id_artist'])

In [375]:
df_all_artists_info.shape

(2752, 11)

In [376]:
df_all_artists_info.head()

Unnamed: 0,genres,href,id,images,name,popularity,type,uri,external_urls.spotify,followers.href,followers.total
0,"[pop, pop rap, rhode island rap, viral rap]",https://api.spotify.com/v1/artists/0vRvGUQVUjy...,0vRvGUQVUjytro0xpb26bs,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Arizona Zervas,68,artist,spotify:artist:0vRvGUQVUjytro0xpb26bs,https://open.spotify.com/artist/0vRvGUQVUjytro...,,701427
1,"[reggaeton, reggaeton colombiano, urbano latino]",https://api.spotify.com/v1/artists/790FomKkXsh...,790FomKkXshlbRYZFtlgla,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",KAROL G,87,artist,spotify:artist:790FomKkXshlbRYZFtlgla,https://open.spotify.com/artist/790FomKkXshlbR...,,31789223
2,[pop],https://api.spotify.com/v1/artists/04gDigrS5kc...,04gDigrS5kc9YWfZHwBETP,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Maroon 5,87,artist,spotify:artist:04gDigrS5kc9YWfZHwBETP,https://open.spotify.com/artist/04gDigrS5kc9YW...,,39483004
3,"[melodic rap, pop, pop rap, viral rap]",https://api.spotify.com/v1/artists/7uaIm6Pw7xp...,7uaIm6Pw7xplS8Dy06V6pT,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Trevor Daniel,68,artist,spotify:artist:7uaIm6Pw7xplS8Dy06V6pT,https://open.spotify.com/artist/7uaIm6Pw7xplS8...,,735839
4,"[dfw rap, melodic rap, rap]",https://api.spotify.com/v1/artists/246dkjvS1zL...,246dkjvS1zLTtiykXe5h60,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Post Malone,89,artist,spotify:artist:246dkjvS1zLTtiykXe5h60,https://open.spotify.com/artist/246dkjvS1zLTti...,,39542755


In [377]:
# escritura fichero intermedio de informacion de artistas
# df_all_artists_info.to_csv('all_artists_info.csv', encoding='utf-8', index=False)

### Extrayendo Caracteristicas de Tracks

Obtener información de funciones de audio para un solo track identificado por su ID de Spotify unico.

https://developer.spotify.com/documentation/web-api/reference/#/operations/get-audio-features

In [315]:
def extract_audio_features(list_tracks):
    df_audio_features = pd.DataFrame()
    #track_features=spotify.audio_features("spotify:track:3nqQXoyQOWXiESFLlDF1hG")
    for track in list_tracks:
        track_features =spotify.audio_features(track)
        df_track_features = pd.json_normalize(track_features)
        df_audio_features = pd.concat([df_audio_features, df_track_features], ignore_index=True)
    return df_audio_features

In [316]:
df_track_features_part1 = extract_audio_features(list_track_part_1)

In [317]:
df_track_features_part2 = extract_audio_features(list_track_part_2)

In [318]:
df_track_features_part3 = extract_audio_features(list_track_part_3)

In [319]:
df_track_features_part4 = extract_audio_features(list_track_part_4)

In [320]:
df_track_features_part5 = extract_audio_features(list_track_part_5)

In [321]:
df_track_features_part6 = extract_audio_features(list_track_part_6)

In [322]:
(df_track_features_part1.shape, df_track_features_part2.shape, df_track_features_part3.shape, df_track_features_part4.shape, df_track_features_part5.shape, df_track_features_part6.shape)

((1000, 18), (1000, 18), (1000, 18), (1000, 18), (1000, 18), (1276, 18))

In [324]:
# concateno daframes obtenidos con las caracteristicas de las canciones

df_tracks_features = pd.DataFrame()  
df_tracks_features = pd.concat([df_track_features_part1, df_track_features_part2], ignore_index=True)
df_tracks_features = pd.concat([df_tracks_features, df_track_features_part3], ignore_index=True)
df_tracks_features = pd.concat([df_tracks_features, df_track_features_part4], ignore_index=True)
df_tracks_features = pd.concat([df_tracks_features, df_track_features_part5], ignore_index=True)
df_tracks_features = pd.concat([df_tracks_features, df_track_features_part6], ignore_index=True) 
df_tracks_features.shape

(6276, 18)

In [325]:
df_tracks_features.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.621,0.601,6.0,-5.616,0.0,0.148,0.0522,0.0,0.46,0.457,116.735,audio_features,696DnlkuDOXcMAnKlTgXXK,spotify:track:696DnlkuDOXcMAnKlTgXXK,https://api.spotify.com/v1/tracks/696DnlkuDOXc...,https://api.spotify.com/v1/audio-analysis/696D...,163636.0,5.0
1,0.803,0.715,2.0,-3.28,1.0,0.298,0.295,0.000134,0.0574,0.574,101.085,audio_features,7k4t7uLgtOxPwTpFmtJNTY,spotify:track:7k4t7uLgtOxPwTpFmtJNTY,https://api.spotify.com/v1/tracks/7k4t7uLgtOxP...,https://api.spotify.com/v1/audio-analysis/7k4t...,200960.0,4.0
2,0.764,0.32,11.0,-7.209,1.0,0.0546,0.837,0.0,0.0822,0.575,91.019,audio_features,2b8fOow8UzyDFAE27YhOZM,spotify:track:2b8fOow8UzyDFAE27YhOZM,https://api.spotify.com/v1/tracks/2b8fOow8UzyD...,https://api.spotify.com/v1/audio-analysis/2b8f...,189486.0,4.0
3,0.785,0.431,10.0,-8.756,0.0,0.0364,0.123,0.0,0.0887,0.236,127.085,audio_features,4TnjEaWOeW0eKTKIEvJyCa,spotify:track:4TnjEaWOeW0eKTKIEvJyCa,https://api.spotify.com/v1/tracks/4TnjEaWOeW0e...,https://api.spotify.com/v1/audio-analysis/4Tnj...,159382.0,4.0
4,0.695,0.762,0.0,-3.497,1.0,0.0395,0.192,0.00244,0.0863,0.553,120.042,audio_features,21jGcNKet2qwijlDFuPiPb,spotify:track:21jGcNKet2qwijlDFuPiPb,https://api.spotify.com/v1/tracks/21jGcNKet2qw...,https://api.spotify.com/v1/audio-analysis/21jG...,215280.0,4.0


In [326]:
# escritura de salida para no perder extraccion de informacion de canciones

#df_tracks_features.to_csv('2000-2022_all_tracks_features.csv', encoding='utf-8', index=False)

### Union de DataFrames Tracks + Artistas + Caracteristicas de Tracks

In [23]:
# lectura ficheros intermedios generados en paso anteriores de extraccion de datos de la API de Spotify

df_tracks_info = pd.read_csv("2000-2022_all_tracks_info_artists.csv", encoding='utf-8')
df_all_artists_info = pd.read_csv('all_artists_info.csv', encoding='utf-8')
df_tracks_features = pd.read_csv('2000-2022_all_tracks_features.csv', encoding='utf-8')

In [25]:
df_tracks_info.head()

Unnamed: 0,artists,available_markets,disc_number,duration_ms,explicit,href,id,is_local,name,popularity,preview_url,track_number,type,uri,album.album_type,album.artists,album.available_markets,album.external_urls.spotify,album.href,album.id,album.images,album.name,album.release_date,album.release_date_precision,album.total_tracks,album.type,album.uri,external_ids.isrc,external_urls.spotify,id_artist_1,name_artist_1,id_artist_2,name_artist_2
0,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,163636,True,https://api.spotify.com/v1/tracks/696DnlkuDOXc...,696DnlkuDOXcMAnKlTgXXK,False,ROXANNE,75,https://p.scdn.co/mp3-preview/fb038785189bb627...,1,track,spotify:track:696DnlkuDOXcMAnKlTgXXK,single,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",https://open.spotify.com/album/6HJDrXs0hpebaRF...,https://api.spotify.com/v1/albums/6HJDrXs0hpeb...,6HJDrXs0hpebaRFKA1sF90,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",ROXANNE,2019-10-10,day,1,album,spotify:album:6HJDrXs0hpebaRFKA1sF90,USSM11914320,https://open.spotify.com/track/696DnlkuDOXcMAn...,0vRvGUQVUjytro0xpb26bs,Arizona Zervas,,
1,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,200960,False,https://api.spotify.com/v1/tracks/7k4t7uLgtOxP...,7k4t7uLgtOxPwTpFmtJNTY,False,Tusa,74,,1,track,spotify:track:7k4t7uLgtOxPwTpFmtJNTY,single,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",https://open.spotify.com/album/7mKevNHhVnZER3B...,https://api.spotify.com/v1/albums/7mKevNHhVnZE...,7mKevNHhVnZER3BLgI8O4F,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Tusa,2019-11-07,day,1,album,spotify:album:7mKevNHhVnZER3BLgI8O4F,USUM71921183,https://open.spotify.com/track/7k4t7uLgtOxPwTp...,790FomKkXshlbRYZFtlgla,KAROL G,0hCNtLu0JehylgoiP8L4Gh,Nicki Minaj
2,[{'external_urls': {'spotify': 'https://open.s...,['KR'],1,189486,False,https://api.spotify.com/v1/tracks/2b8fOow8UzyD...,2b8fOow8UzyDFAE27YhOZM,False,Memories,28,,1,track,spotify:track:2b8fOow8UzyDFAE27YhOZM,single,[{'external_urls': {'spotify': 'https://open.s...,['KR'],https://open.spotify.com/album/3nR9B40hYLKLcR0...,https://api.spotify.com/v1/albums/3nR9B40hYLKL...,3nR9B40hYLKLcR0Eph3Goc,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Memories,2019-09-20,day,1,album,spotify:album:3nR9B40hYLKLcR0Eph3Goc,USUM71913350,https://open.spotify.com/track/2b8fOow8UzyDFAE...,04gDigrS5kc9YWfZHwBETP,Maroon 5,,
3,[{'external_urls': {'spotify': 'https://open.s...,[],1,159381,False,https://api.spotify.com/v1/tracks/4TnjEaWOeW0e...,4TnjEaWOeW0eKTKIEvJyCa,False,Falling,3,,1,track,spotify:track:4TnjEaWOeW0eKTKIEvJyCa,single,[{'external_urls': {'spotify': 'https://open.s...,[],https://open.spotify.com/album/1Czfd5tEby3DbdY...,https://api.spotify.com/v1/albums/1Czfd5tEby3D...,1Czfd5tEby3DbdYNdqzrCa,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Falling,2018-10-05,day,1,album,spotify:album:1Czfd5tEby3DbdYNdqzrCa,USUYG1221109,https://open.spotify.com/track/4TnjEaWOeW0eKTK...,7uaIm6Pw7xplS8Dy06V6pT,Trevor Daniel,,
4,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",1,215280,False,https://api.spotify.com/v1/tracks/21jGcNKet2qw...,21jGcNKet2qwijlDFuPiPb,False,Circles,83,,6,track,spotify:track:21jGcNKet2qwijlDFuPiPb,album,[{'external_urls': {'spotify': 'https://open.s...,"['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT...",https://open.spotify.com/album/4g1ZRSobMefqF6n...,https://api.spotify.com/v1/albums/4g1ZRSobMefq...,4g1ZRSobMefqF6nelkgibi,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Hollywood's Bleeding,2019-09-06,day,17,album,spotify:album:4g1ZRSobMefqF6nelkgibi,USUM71915699,https://open.spotify.com/track/21jGcNKet2qwijl...,246dkjvS1zLTtiykXe5h60,Post Malone,,


In [26]:
# generando dataframe con columnas relevante del Dataframe de Tracks

df_tracks_final = df_tracks_info[['explicit', 'id', 'name', 'album.release_date', 'popularity', 'id_artist_1', 'name_artist_1', 'id_artist_2', 'name_artist_2' ]]
df_tracks_final.columns = ['explicit', 'track_id', 'track_name', 'release_date', 'popularity', 'id_artist_1', 'name_artist_1', 'id_artist_2', 'name_artist_2' ]

In [27]:
# elimino duplicados de dataframe de tracks por si existiera alguno

df_tracks_final = df_tracks_final.drop_duplicates(subset=['track_id'])
df_tracks_final.shape

(6274, 9)

In [28]:
# Union de dataframe de tracks con artista para el artista 1

df_tracks_final = df_tracks_final.merge(df_all_artists_info[['id', 'genres', 'popularity', 'followers.total']], how="left", left_on="id_artist_1", right_on="id", 
suffixes=('_track', '_artist_1'))

df_tracks_final.columns = ['explicit', 'track_id', 'track_name', 'release_date', 'popularity_track', 'id_artist_1', 'name_artist_1', 'id_artist_2', 'name_artist_2', 'id',
       'genres_artist_1', 'popularity_artist_1', 'followers_artist_1']

In [29]:
df_tracks_final.drop(['id'], axis=1, inplace=True)

In [30]:
df_tracks_final.shape

(6274, 12)

In [31]:
# Union de dataframe de tracks con artista para el artista 2

df_tracks_final = df_tracks_final.merge(df_all_artists_info[['id', 'genres', 'popularity', 'followers.total']], how="left", left_on="id_artist_2", 
        right_on="id", suffixes=('_track', '_artist_2'))

In [32]:
df_tracks_final.columns = ['explicit', 'track_id', 'track_name', 'release_date', 'popularity_track', 'id_artist_1', 'name_artist_1', 'id_artist_2', 'name_artist_2',
       'genres_artist_1', 'popularity_artist_1', 'followers_artist_1', 'id', 'genres_artist_2', 'popularity_artist_2', 'followers_artist_2']

df_tracks_final.drop(['id'], axis=1, inplace=True)


In [33]:
df_tracks_final.shape

(6274, 15)

In [34]:
# elimino duplicados de dataframe de caracteristicas de tracks por si existiera alguno

df_tracks_features = df_tracks_features.drop_duplicates(subset=['id'])

In [35]:
# Union de dataframe de tracks con dataframe de caracteristicas

features_columns = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'id',  'duration_ms', 'time_signature']

df_tracks_final = df_tracks_final.merge(df_tracks_features[features_columns], how="left", left_on="track_id", right_on="id", suffixes=('_track', '_feature'))

In [36]:
df_tracks_final.shape

(6274, 29)

In [37]:
# escritura de salida para no perder dataframe final con datos de canciones

df_tracks_final.to_csv('2000-2022_all_tracks_final.csv', encoding='utf-8', index=False)