In [1]:
import pandas as pd
import numpy as np
import requests
import os 
from dotenv import load_dotenv
import requests
from tqdm import tqdm
pd.options.display.max_columns = None

In [2]:
import spotipy 
from spotipy.oauth2 import SpotifyClientCredentials ## para poner mis credenciales

# lastfm
Voy a consultar todos las top canciones de la página de lastfm y voy a sacar los top géneros de estas. Para ello, se obtienen los datos con los métodos `'chart.getTopTracks'` y `'artist.getTopTags'`

Aquí está la docu, para más información: https://www.last.fm/api#getting-started

En primer lugar, voy a introducir mis credenciales como parámetros de entorno.

In [3]:
load_dotenv()

True

In [4]:
api = os.getenv("lastfm-id")
username = os.getenv("lastfm-user")

Para hacer la consulta a la api, tengo que darle como input una serie de parámetros:
- el link al que quiero hacer la petición ( https://ws.audioscrobbler.com/2.0/ )
- `headers`: mis credenciales de usuario de la api / lastfm
- `params`: los parámetros que tengo que meter para poder hacer la consulta a la api. En este caso, serán los siguientes:
  - `api_key`: la clave con la que voy a acceder
  - `method`: los datos a los que quiero acceder. En este caso, `'chart.gettopartists'` para que me de los top artistas de la plataforma.
  - `format`: el formato que quiero que me de de respuesta, pudiendo ser `json` o `xml`

In [5]:
headers = {
    'user-agent': username
}

params = {
    'api_key': api,
    'method': 'chart.getTopTracks',
    'format': 'json'
}

res = requests.get('https://ws.audioscrobbler.com/2.0/', headers=headers, params=params)
res.status_code ## si 200 -> éxito

200

In [6]:
res.json()['tracks']['track'][0] ## el output de la petición es en formato json, en efecto

{'name': 'Kill Bill',
 'duration': '0',
 'playcount': '4259627',
 'listeners': '444103',
 'mbid': '',
 'url': 'https://www.last.fm/music/SZA/_/Kill+Bill',
 'streamable': {'#text': '0', 'fulltrack': '0'},
 'artist': {'name': 'SZA',
  'mbid': '272989c8-5535-492d-a25c-9f58803e027f',
  'url': 'https://www.last.fm/music/SZA'},
 'image': [{'#text': 'https://lastfm.freetls.fastly.net/i/u/34s/2a96cbd8b46e442fc41c2b86b821562f.png',
   'size': 'small'},
  {'#text': 'https://lastfm.freetls.fastly.net/i/u/64s/2a96cbd8b46e442fc41c2b86b821562f.png',
   'size': 'medium'},
  {'#text': 'https://lastfm.freetls.fastly.net/i/u/174s/2a96cbd8b46e442fc41c2b86b821562f.png',
   'size': 'large'},
  {'#text': 'https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png',
   'size': 'extralarge'}]}

Me creo una función para fijar los parámetros fijos, y que el `method` entre como variable.

In [7]:
## me voy a hacer una función con los datos de la petición
def getLastFMData(params):
    headers = {'user-agent': username} ## defino mi usuario
    url = 'https://ws.audioscrobbler.com/2.0/' ##url a la que voy a contactar

    ## configuro los params para hacer la petición
    params['api_key'] = api
    params['format'] = 'json'

    res = requests.get(url, headers=headers, params=params)
    return res

In [8]:
## voy a probar que la petición funciona
topTracks = getLastFMData({
    'method': 'chart.getTopTracks'
})
topTracks.status_code

200

In [9]:
topTracks.json()['tracks']['track'][0] ## me da un cacharro de tipo json, al que ya me enfrentaré luego

{'name': 'Kill Bill',
 'duration': '0',
 'playcount': '2736926',
 'listeners': '359360',
 'mbid': '',
 'url': 'https://www.last.fm/music/SZA/_/Kill+Bill',
 'streamable': {'#text': '0', 'fulltrack': '0'},
 'artist': {'name': 'SZA',
  'mbid': '272989c8-5535-492d-a25c-9f58803e027f',
  'url': 'https://www.last.fm/music/SZA'},
 'image': [{'#text': 'https://lastfm.freetls.fastly.net/i/u/34s/2a96cbd8b46e442fc41c2b86b821562f.png',
   'size': 'small'},
  {'#text': 'https://lastfm.freetls.fastly.net/i/u/64s/2a96cbd8b46e442fc41c2b86b821562f.png',
   'size': 'medium'},
  {'#text': 'https://lastfm.freetls.fastly.net/i/u/174s/2a96cbd8b46e442fc41c2b86b821562f.png',
   'size': 'large'},
  {'#text': 'https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png',
   'size': 'extralarge'}]}

Para obtener la info concreta de los artistas, necesito saber la página de la que quiero obtener la info y el número de resultados que quiero obtener por página. https://www.last.fm/api/show/chart.getTopArtists

Los parámetros para mi consulta los puedo sacar del diccionario de mi consulta anterior, en la clave `'@attr'`, de forma que los puedo poner como parámetros de mi función.

In [10]:
topTracks.json()['tracks']['@attr']

{'page': '1', 'perPage': '50', 'totalPages': '642101', 'total': '32105014'}

In [11]:
## voy a intentar obtener la info de los tracks por cada una de las páginas
tqdm.pandas()

responses = [] ## almacenaré cada una de las peticiones por página en una lista

page_inicio = 1
#page_final = int(topTracks.json()['tracks']['@attr']['totalPages']) #sé el número máx de páginas de la consulta que hice anteriormente
page_limit = int(topTracks.json()['tracks']['@attr']['perPage']) #sé el número de artistas que tengo por página
page_final = 10 ## lo introduzco a mano para que no se me pete la consulta

for page in tqdm(range(page_inicio, page_final)):
    if page <= page_final:
        ## cambio los parámetros de entrada 
        params = {
            'method': 'chart.gettoptracks',
            'limit': page_limit, 
            'page': page
        }
        
        ## hago la petición con los nuevos parámetros
        res = getLastFMData(params)

        ## puedo sacar la página por la que voy
        page = int(res.json()['tracks']['@attr']['page'])

        ## añado las respuesta a la lista
        responses.append(res)

        ## le sumo uno a la página para que avance
        page += 1

100%|██████████| 9/9 [00:01<00:00,  4.87it/s]


Voy a juntar las tracks en un dataframe

In [12]:
## voy a fijarme en la estructura de una de las respuestas que he appendeado a responses
## al final, es un diccionario que tiene dentro otro diccionario y da los valores en listas
responses[0].json()['tracks']['track'][0] ## así puedo obtener la información como una lista de listas, por la que puedo iterar y convertir en dataframes

{'name': 'Kill Bill',
 'duration': '0',
 'playcount': '2946397',
 'listeners': '371889',
 'mbid': '',
 'url': 'https://www.last.fm/music/SZA/_/Kill+Bill',
 'streamable': {'#text': '0', 'fulltrack': '0'},
 'artist': {'name': 'SZA',
  'mbid': '272989c8-5535-492d-a25c-9f58803e027f',
  'url': 'https://www.last.fm/music/SZA'},
 'image': [{'#text': 'https://lastfm.freetls.fastly.net/i/u/34s/2a96cbd8b46e442fc41c2b86b821562f.png',
   'size': 'small'},
  {'#text': 'https://lastfm.freetls.fastly.net/i/u/64s/2a96cbd8b46e442fc41c2b86b821562f.png',
   'size': 'medium'},
  {'#text': 'https://lastfm.freetls.fastly.net/i/u/174s/2a96cbd8b46e442fc41c2b86b821562f.png',
   'size': 'large'},
  {'#text': 'https://lastfm.freetls.fastly.net/i/u/300x300/2a96cbd8b46e442fc41c2b86b821562f.png',
   'size': 'extralarge'}]}

In [13]:
pd.DataFrame(responses[0].json()['tracks']['track']).head()

Unnamed: 0,name,duration,playcount,listeners,mbid,url,streamable,artist,image
0,Kill Bill,0,2946397,371889,,https://www.last.fm/music/SZA/_/Kill+Bill,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...
1,Anti-Hero,0,10077122,560069,,https://www.last.fm/music/Taylor+Swift/_/Anti-...,"{'#text': '0', 'fulltrack': '0'}","{'name': 'Taylor Swift', 'mbid': '20244d07-534...",[{'#text': 'https://lastfm.freetls.fastly.net/...
2,SOS,0,995490,307189,,https://www.last.fm/music/SZA/_/SOS,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...
3,As It Was,0,20588530,909911,,https://www.last.fm/music/Harry+Styles/_/As+It...,"{'#text': '0', 'fulltrack': '0'}","{'name': 'Harry Styles', 'mbid': '', 'url': 'h...",[{'#text': 'https://lastfm.freetls.fastly.net/...
4,Shirt,0,3121081,359910,,https://www.last.fm/music/SZA/_/Shirt,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...


In [14]:
pags = [pd.DataFrame(response.json()['tracks']['track']) for response in responses]
tracks = pd.concat(pags)
print(tracks.shape)
tracks.head()

(500, 9)


Unnamed: 0,name,duration,playcount,listeners,mbid,url,streamable,artist,image
0,Kill Bill,0,2946397,371889,,https://www.last.fm/music/SZA/_/Kill+Bill,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...
1,Anti-Hero,0,10077122,560069,,https://www.last.fm/music/Taylor+Swift/_/Anti-...,"{'#text': '0', 'fulltrack': '0'}","{'name': 'Taylor Swift', 'mbid': '20244d07-534...",[{'#text': 'https://lastfm.freetls.fastly.net/...
2,SOS,0,995490,307189,,https://www.last.fm/music/SZA/_/SOS,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...
3,As It Was,0,20588530,909911,,https://www.last.fm/music/Harry+Styles/_/As+It...,"{'#text': '0', 'fulltrack': '0'}","{'name': 'Harry Styles', 'mbid': '', 'url': 'h...",[{'#text': 'https://lastfm.freetls.fastly.net/...
4,Shirt,0,3121081,359910,,https://www.last.fm/music/SZA/_/Shirt,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...


In [15]:
tracks.info()
tracks.describe().T

<class 'pandas.core.frame.DataFrame'>
Int64Index: 500 entries, 0 to 49
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   name        500 non-null    object
 1   duration    500 non-null    object
 2   playcount   500 non-null    object
 3   listeners   500 non-null    object
 4   mbid        500 non-null    object
 5   url         500 non-null    object
 6   streamable  500 non-null    object
 7   artist      500 non-null    object
 8   image       500 non-null    object
dtypes: object(9)
memory usage: 39.1+ KB


Unnamed: 0,count,unique,top,freq
name,500,448,Die For You,3
duration,500,86,0,386
playcount,500,459,2946397,2
listeners,500,458,371889,2
mbid,500,112,,382
url,500,450,https://www.last.fm/music/SZA/_/Kill+Bill,2
streamable,500,1,"{'#text': '0', 'fulltrack': '0'}",500
artist,500,161,"{'name': 'Taylor Swift', 'mbid': '20244d07-534...",59
image,500,1,[{'#text': 'https://lastfm.freetls.fastly.net/...,500


In [16]:
#tracks.to_csv('tracks.csv')

Voy a crearme una columna con los artistas limpios, para poder hacer la búsqueda de los tags.

In [17]:
tracks.head()

Unnamed: 0,name,duration,playcount,listeners,mbid,url,streamable,artist,image
0,Kill Bill,0,2946397,371889,,https://www.last.fm/music/SZA/_/Kill+Bill,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...
1,Anti-Hero,0,10077122,560069,,https://www.last.fm/music/Taylor+Swift/_/Anti-...,"{'#text': '0', 'fulltrack': '0'}","{'name': 'Taylor Swift', 'mbid': '20244d07-534...",[{'#text': 'https://lastfm.freetls.fastly.net/...
2,SOS,0,995490,307189,,https://www.last.fm/music/SZA/_/SOS,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...
3,As It Was,0,20588530,909911,,https://www.last.fm/music/Harry+Styles/_/As+It...,"{'#text': '0', 'fulltrack': '0'}","{'name': 'Harry Styles', 'mbid': '', 'url': 'h...",[{'#text': 'https://lastfm.freetls.fastly.net/...
4,Shirt,0,3121081,359910,,https://www.last.fm/music/SZA/_/Shirt,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...


In [18]:
def sacarArtista(col):
    return  col["name"]
def sacarMBID(col):
    return  col["mbid"]
def sacarStreameable(col):
    return  col["fulltrack"]

In [19]:
tracks['artist_clean'] = tracks['artist'].apply(sacarArtista)
tracks['MBID'] = tracks['artist'].apply(sacarMBID)
tracks['Streamable'] = tracks['streamable'].apply(sacarStreameable)

In [20]:
tracks.head()

Unnamed: 0,name,duration,playcount,listeners,mbid,url,streamable,artist,image,artist_clean,MBID,Streamable
0,Kill Bill,0,2946397,371889,,https://www.last.fm/music/SZA/_/Kill+Bill,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...,SZA,272989c8-5535-492d-a25c-9f58803e027f,0
1,Anti-Hero,0,10077122,560069,,https://www.last.fm/music/Taylor+Swift/_/Anti-...,"{'#text': '0', 'fulltrack': '0'}","{'name': 'Taylor Swift', 'mbid': '20244d07-534...",[{'#text': 'https://lastfm.freetls.fastly.net/...,Taylor Swift,20244d07-534f-4eff-b4d4-930878889970,0
2,SOS,0,995490,307189,,https://www.last.fm/music/SZA/_/SOS,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...,SZA,272989c8-5535-492d-a25c-9f58803e027f,0
3,As It Was,0,20588530,909911,,https://www.last.fm/music/Harry+Styles/_/As+It...,"{'#text': '0', 'fulltrack': '0'}","{'name': 'Harry Styles', 'mbid': '', 'url': 'h...",[{'#text': 'https://lastfm.freetls.fastly.net/...,Harry Styles,,0
4,Shirt,0,3121081,359910,,https://www.last.fm/music/SZA/_/Shirt,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...,SZA,272989c8-5535-492d-a25c-9f58803e027f,0


In [21]:
tracks[['name', 'artist_clean']].duplicated().sum()

50

In [22]:
tracks.shape

(500, 12)

In [23]:
## voy a quitar duplicados basados en el nombre de la pista y el artista
tracks.drop_duplicates(subset = ['name', 'artist_clean'], keep='first', inplace=True)

In [24]:
tracks.shape

(450, 12)

Extraigo ahora los top géneros de cada canción con `'artist.getTopTags'` ( https://www.last.fm/api/show/track.getTags ). Tendré que dar estos parámetros a la función:
- `artist` (Required (unless mbid)] : The artist name
- `track` (Required (unless mbid)] : The track name

In [25]:
res = getLastFMData({
    'method': 'track.getTopTags',
    'artist':'Harry Styles',
    'track':'As It Was'
})
res.json()['toptags']['tag'][0]['name']

'pop'

In [26]:
[tag['name'] for tag in res.json()['toptags']['tag'][:3]]

['pop', 'rock', 'indie pop']

In [27]:
def getTrackTags(artist, track):
    ## hago la colsulta para obtener los datos de tags por artista
    res = getLastFMData({
        'method': 'track.getTopTags',
        'artist':  artist,
        'track': track,
    })

    # por si me da un timeout
    if res.status_code != 200:
        return None

    # extraigo los top 5 tags de cada artistas
    try:
        #tags = [tag['name'] for tag in res.json()['toptags']['tag'][:3]]
        return res.json()['toptags']['tag'][0]['name'] ## no va muy bien esta parte y solo me extrae tres géneros... 
    except:
        return np.nan

In [28]:
tqdm.pandas()

## esto es un apply pero le pongo la barra chula para que me dé el status
tracks['tags'] = tracks.progress_apply(lambda x: getTrackTags(x.artist_clean, x.name), axis=1)

100%|██████████| 450/450 [01:15<00:00,  5.93it/s]


In [29]:
tracks['tags'].unique()

array([nan, 'rap', '80s'], dtype=object)

In [30]:
tracks[tracks['tags'].isnull() == False]

Unnamed: 0,name,duration,playcount,listeners,mbid,url,streamable,artist,image,artist_clean,MBID,Streamable,tags
48,See You Again (feat. Kali Uchis),0,12128431,788155,,"https://www.last.fm/music/Tyler,+the+Creator/_...","{'#text': '0', 'fulltrack': '0'}","{'name': 'Tyler, the Creator', 'mbid': 'f6beac...",[{'#text': 'https://lastfm.freetls.fastly.net/...,"Tyler, the Creator",f6beac20-5dfe-4d1f-ae02-0b0a740aafd6,0,rap
1,This Charming Man - 2011 Remaster,0,6193028,545182,,https://www.last.fm/music/The+Smiths/_/This+Ch...,"{'#text': '0', 'fulltrack': '0'}","{'name': 'The Smiths', 'mbid': '40f5d9e4-2de7-...",[{'#text': 'https://lastfm.freetls.fastly.net/...,The Smiths,40f5d9e4-2de7-4f2d-ad41-e31a9a9fea27,0,80s


In [31]:
tracks[tracks['tags'].isnull() == True].head()

Unnamed: 0,name,duration,playcount,listeners,mbid,url,streamable,artist,image,artist_clean,MBID,Streamable,tags
0,Kill Bill,0,2946397,371889,,https://www.last.fm/music/SZA/_/Kill+Bill,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,
1,Anti-Hero,0,10077122,560069,,https://www.last.fm/music/Taylor+Swift/_/Anti-...,"{'#text': '0', 'fulltrack': '0'}","{'name': 'Taylor Swift', 'mbid': '20244d07-534...",[{'#text': 'https://lastfm.freetls.fastly.net/...,Taylor Swift,20244d07-534f-4eff-b4d4-930878889970,0,
2,SOS,0,995490,307189,,https://www.last.fm/music/SZA/_/SOS,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,
3,As It Was,0,20588530,909911,,https://www.last.fm/music/Harry+Styles/_/As+It...,"{'#text': '0', 'fulltrack': '0'}","{'name': 'Harry Styles', 'mbid': '', 'url': 'h...",[{'#text': 'https://lastfm.freetls.fastly.net/...,Harry Styles,,0,
4,Shirt,0,3121081,359910,,https://www.last.fm/music/SZA/_/Shirt,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,


In [32]:
tracks.head()

Unnamed: 0,name,duration,playcount,listeners,mbid,url,streamable,artist,image,artist_clean,MBID,Streamable,tags
0,Kill Bill,0,2946397,371889,,https://www.last.fm/music/SZA/_/Kill+Bill,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,
1,Anti-Hero,0,10077122,560069,,https://www.last.fm/music/Taylor+Swift/_/Anti-...,"{'#text': '0', 'fulltrack': '0'}","{'name': 'Taylor Swift', 'mbid': '20244d07-534...",[{'#text': 'https://lastfm.freetls.fastly.net/...,Taylor Swift,20244d07-534f-4eff-b4d4-930878889970,0,
2,SOS,0,995490,307189,,https://www.last.fm/music/SZA/_/SOS,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,
3,As It Was,0,20588530,909911,,https://www.last.fm/music/Harry+Styles/_/As+It...,"{'#text': '0', 'fulltrack': '0'}","{'name': 'Harry Styles', 'mbid': '', 'url': 'h...",[{'#text': 'https://lastfm.freetls.fastly.net/...,Harry Styles,,0,
4,Shirt,0,3121081,359910,,https://www.last.fm/music/SZA/_/Shirt,"{'#text': '0', 'fulltrack': '0'}","{'name': 'SZA', 'mbid': '272989c8-5535-492d-a2...",[{'#text': 'https://lastfm.freetls.fastly.net/...,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,


Limpio las columnas que no estoy usando.

In [33]:
tracks.drop(['mbid', 'streamable', 'artist', 'image'], axis = 1, inplace=True)
tracks.rename({'artist_clean':'artist'}, axis = 1, inplace=True)
tracks.head()

Unnamed: 0,name,duration,playcount,listeners,url,artist,MBID,Streamable,tags
0,Kill Bill,0,2946397,371889,https://www.last.fm/music/SZA/_/Kill+Bill,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,
1,Anti-Hero,0,10077122,560069,https://www.last.fm/music/Taylor+Swift/_/Anti-...,Taylor Swift,20244d07-534f-4eff-b4d4-930878889970,0,
2,SOS,0,995490,307189,https://www.last.fm/music/SZA/_/SOS,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,
3,As It Was,0,20588530,909911,https://www.last.fm/music/Harry+Styles/_/As+It...,Harry Styles,,0,
4,Shirt,0,3121081,359910,https://www.last.fm/music/SZA/_/Shirt,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,


In [34]:
#tracks.to_csv('tracks_with_tags.csv')

# spotify
Aquí está la docu: https://spotipy.readthedocs.io/en/2.22.0/#

Configuro el cliente y le metó las keys para acceder a la API de Spotify y poder sacar datos de las canciones que tengo

In [35]:
id=os.getenv("id")
secret=os.getenv("secret")

client_credentials_manager = SpotifyClientCredentials(client_id=id,
                                                      client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

Primero voy a sacar el URI de mis canciones. Para ello uso el método de spotipy.search, que permite buscar con una query el nombre del artista y la canción.

In [36]:
## esto es para obtener una uri --> me voy a sacar los uris para mi data frame
artist = 'los del rio'
track = 'la macarena'
q = 'artist:{} track: {}'.format(artist, track)
results = sp.search(q=q, limit=1, type='track')
print(results['tracks']['items'][0]['uri'])

spotify:track:3mbmmddF7fxUNFkXV91ecm


In [37]:
for index, row in tracks.iterrows():
    print(row['artist'],'---', row['name'])
    break

SZA --- Kill Bill


In [38]:
uris = []
for index, row in tracks.iterrows():
    q = 'artist:{} track: {}'.format(row['artist'], row['name'])
    res = sp.search(q=q, limit=1, type='track')
    try:
        uri = res['tracks']['items'][0]['uri']
        #print(res['tracks']['items'][0]['uri'])
        uris.append(uri)
    except:
        uris.append('')
    #break

Voy a comprobar que la lista donde me he appendeado todo tiene la misma longitud que mi dataframe, para poder añadir los URIs como una columna

In [39]:
uris[:3]

['spotify:track:3OHfY25tqY28d16oZczHc8',
 'spotify:track:0V3wPSX9ygBnCm8psDIegu',
 'spotify:track:5xMw6qCcpd2gBXPGTegC4W']

In [40]:
print(len(uris), tracks.shape)

450 (450, 9)


In [41]:
tracks['URI'] = uris
tracks.head()

Unnamed: 0,name,duration,playcount,listeners,url,artist,MBID,Streamable,tags,URI
0,Kill Bill,0,2946397,371889,https://www.last.fm/music/SZA/_/Kill+Bill,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,,spotify:track:3OHfY25tqY28d16oZczHc8
1,Anti-Hero,0,10077122,560069,https://www.last.fm/music/Taylor+Swift/_/Anti-...,Taylor Swift,20244d07-534f-4eff-b4d4-930878889970,0,,spotify:track:0V3wPSX9ygBnCm8psDIegu
2,SOS,0,995490,307189,https://www.last.fm/music/SZA/_/SOS,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,,spotify:track:5xMw6qCcpd2gBXPGTegC4W
3,As It Was,0,20588530,909911,https://www.last.fm/music/Harry+Styles/_/As+It...,Harry Styles,,0,,spotify:track:4LRPiXqCikLlN15c3yImP7
4,Shirt,0,3121081,359910,https://www.last.fm/music/SZA/_/Shirt,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,,spotify:track:34ZAzO78a5DAVNrYIGWcPm


Ahora con el método `'audio_features'` de spotipy quiero sacar las propiedades que tienen las canciones. Para ello, tengo que aportar:
- `URI`

Y el resultado de cada una de mis consultas, lo almacenaré en una lista de listas que luego appendearé a mi dataframe.

In [42]:
sp.audio_features('spotify:track:3OHfY25tqY28d16oZczHc8')

[{'danceability': 0.644,
  'energy': 0.728,
  'key': 8,
  'loudness': -5.75,
  'mode': 1,
  'speechiness': 0.0351,
  'acousticness': 0.0543,
  'instrumentalness': 0.169,
  'liveness': 0.161,
  'valence': 0.43,
  'tempo': 88.993,
  'type': 'audio_features',
  'id': '3OHfY25tqY28d16oZczHc8',
  'uri': 'spotify:track:3OHfY25tqY28d16oZczHc8',
  'track_href': 'https://api.spotify.com/v1/tracks/3OHfY25tqY28d16oZczHc8',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/3OHfY25tqY28d16oZczHc8',
  'duration_ms': 153947,
  'time_signature': 4}]

In [43]:
type(sp.audio_features('spotify:track:3OHfY25tqY28d16oZczHc8'))

list

In [44]:
datos_uris = []
for index, row in tracks.iterrows():
    uri = sp.audio_features(row['URI'])
    datos_uris.append(uri)

Y tengo mis datos sobre las canciones, pero tengo que limpiar un poco para poder hacer el append.

In [51]:
type(pd.DataFrame(datos_uris).iloc[0,0]) ## puedo sacar las claves para nombrar a mis columnas

dict

In [53]:
columnas = list(pd.DataFrame(datos_uris).iloc[0,0].keys())
columnas[:5]

['danceability', 'energy', 'key', 'loudness', 'mode']

In [56]:
df_uris = pd.DataFrame(datos_uris)
df_uris[columnas] = df_uris[0].apply(pd.Series)
df_uris.head()

  df_uris[columnas] = df_uris[0].apply(pd.Series)
  df_uris[columnas] = df_uris[0].apply(pd.Series)
  df_uris[columnas] = df_uris[0].apply(pd.Series)
  df_uris[columnas] = df_uris[0].apply(pd.Series)
  df_uris[columnas] = df_uris[0].apply(pd.Series)


Unnamed: 0,0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,"{'danceability': 0.644, 'energy': 0.728, 'key'...",0.644,0.728,8.0,-5.75,1.0,0.0351,0.0543,0.169,0.161,0.43,88.993,audio_features,3OHfY25tqY28d16oZczHc8,spotify:track:3OHfY25tqY28d16oZczHc8,https://api.spotify.com/v1/tracks/3OHfY25tqY28...,https://api.spotify.com/v1/audio-analysis/3OHf...,153947.0,4.0
1,"{'danceability': 0.637, 'energy': 0.643, 'key'...",0.637,0.643,4.0,-6.571,1.0,0.0519,0.13,2e-06,0.142,0.533,97.008,audio_features,0V3wPSX9ygBnCm8psDIegu,spotify:track:0V3wPSX9ygBnCm8psDIegu,https://api.spotify.com/v1/tracks/0V3wPSX9ygBn...,https://api.spotify.com/v1/audio-analysis/0V3w...,200690.0,4.0
2,"{'danceability': 0.507, 'energy': 0.657, 'key'...",0.507,0.657,7.0,-7.356,0.0,0.233,0.669,7e-06,0.0947,0.506,119.159,audio_features,5xMw6qCcpd2gBXPGTegC4W,spotify:track:5xMw6qCcpd2gBXPGTegC4W,https://api.spotify.com/v1/tracks/5xMw6qCcpd2g...,https://api.spotify.com/v1/audio-analysis/5xMw...,117773.0,1.0
3,"{'danceability': 0.52, 'energy': 0.731, 'key':...",0.52,0.731,6.0,-5.338,0.0,0.0557,0.342,0.00101,0.311,0.662,173.93,audio_features,4LRPiXqCikLlN15c3yImP7,spotify:track:4LRPiXqCikLlN15c3yImP7,https://api.spotify.com/v1/tracks/4LRPiXqCikLl...,https://api.spotify.com/v1/audio-analysis/4LRP...,167303.0,4.0
4,"{'danceability': 0.824, 'energy': 0.453, 'key'...",0.824,0.453,3.0,-9.604,0.0,0.0968,0.146,0.0273,0.0896,0.552,119.959,audio_features,34ZAzO78a5DAVNrYIGWcPm,spotify:track:34ZAzO78a5DAVNrYIGWcPm,https://api.spotify.com/v1/tracks/34ZAzO78a5DA...,https://api.spotify.com/v1/audio-analysis/34ZA...,181831.0,4.0


Lo junto ahora con mi dataframe inicial y limpio las columnas que no necesite.

In [58]:
tracks = tracks.merge(df_uris, left_on = 'URI', right_on='uri', indicator=True)
tracks['_merge'].value_counts() ## compruebo que el cruce se haga con éxito

both          447
left_only       0
right_only      0
Name: _merge, dtype: int64

In [62]:
tracks.drop([0, 'uri', '_merge'], axis = 1, inplace=True)
tracks.head()

Unnamed: 0,name,duration,playcount,listeners,url,artist,MBID,Streamable,tags,URI,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,track_href,analysis_url,duration_ms,time_signature
0,Kill Bill,0,2946397,371889,https://www.last.fm/music/SZA/_/Kill+Bill,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,,spotify:track:3OHfY25tqY28d16oZczHc8,0.644,0.728,8.0,-5.75,1.0,0.0351,0.0543,0.169,0.161,0.43,88.993,audio_features,3OHfY25tqY28d16oZczHc8,https://api.spotify.com/v1/tracks/3OHfY25tqY28...,https://api.spotify.com/v1/audio-analysis/3OHf...,153947.0,4.0
1,Anti-Hero,0,10077122,560069,https://www.last.fm/music/Taylor+Swift/_/Anti-...,Taylor Swift,20244d07-534f-4eff-b4d4-930878889970,0,,spotify:track:0V3wPSX9ygBnCm8psDIegu,0.637,0.643,4.0,-6.571,1.0,0.0519,0.13,2e-06,0.142,0.533,97.008,audio_features,0V3wPSX9ygBnCm8psDIegu,https://api.spotify.com/v1/tracks/0V3wPSX9ygBn...,https://api.spotify.com/v1/audio-analysis/0V3w...,200690.0,4.0
2,SOS,0,995490,307189,https://www.last.fm/music/SZA/_/SOS,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,,spotify:track:5xMw6qCcpd2gBXPGTegC4W,0.507,0.657,7.0,-7.356,0.0,0.233,0.669,7e-06,0.0947,0.506,119.159,audio_features,5xMw6qCcpd2gBXPGTegC4W,https://api.spotify.com/v1/tracks/5xMw6qCcpd2g...,https://api.spotify.com/v1/audio-analysis/5xMw...,117773.0,1.0
3,As It Was,0,20588530,909911,https://www.last.fm/music/Harry+Styles/_/As+It...,Harry Styles,,0,,spotify:track:4LRPiXqCikLlN15c3yImP7,0.52,0.731,6.0,-5.338,0.0,0.0557,0.342,0.00101,0.311,0.662,173.93,audio_features,4LRPiXqCikLlN15c3yImP7,https://api.spotify.com/v1/tracks/4LRPiXqCikLl...,https://api.spotify.com/v1/audio-analysis/4LRP...,167303.0,4.0
4,Shirt,0,3121081,359910,https://www.last.fm/music/SZA/_/Shirt,SZA,272989c8-5535-492d-a25c-9f58803e027f,0,,spotify:track:34ZAzO78a5DAVNrYIGWcPm,0.824,0.453,3.0,-9.604,0.0,0.0968,0.146,0.0273,0.0896,0.552,119.959,audio_features,34ZAzO78a5DAVNrYIGWcPm,https://api.spotify.com/v1/tracks/34ZAzO78a5DA...,https://api.spotify.com/v1/audio-analysis/34ZA...,181831.0,4.0
