# Spotify Data Scrapper
This notebook leverages the capabilities of Spotify's API to create a Dataset for the projects described in this repo

In [92]:
#Importing libraries
import pandas as pd
import seaborn as sns
import numpy as np
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
sns.set(rc={'figure.figsize':(11.7,8.27)})

#API parameters go here
cid = 'your-cid'
secret = 'your-secret'
redirect_uri='http://localhost:7777/callback'
username = 'your-username'
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=cid, client_secret=secret))



In [93]:
#Define playlists to scrap (right clicking them in Spotify reveals their playlist ID)
playlists = [
    '4tszLL7NTfLCoIz39Zsiy1',
    '1oUgBxHbwjbscgj0vOWkJY',
    '37i9dQZF1DX2shzuwwKw0y',
    '37i9dQZF1DWXRqgorJj26U',
    '1DeVya9cgtEGZJ0eiREWJJ',
    '37i9dQZF1DWWOaP4H0w5b0',
    '0EfZGNw0JHvoCkNfnMDYAU',
    '37i9dQZF1DZ06evO1sJmec',
    '37i9dQZF1DZ06evO3RbzfW',
    '6L3dKN1gZU74cStCIgA0yS',
    '37i9dQZF1DZ06evO1xnOx2',
    '37i9dQZF1DZ06evO1mfBM4',
    '37i9dQZF1DZ06evO0Mu9Si',
    '3IyNJEsknaSFoUIn8qf1Lr'
    ]

#Structures for dataframe creation
tracks=[]
artists=[]
lista_rep=[]
track_id=[]
dates=[]

#Query playlist by playlist and populate data structures
for playlist in playlists:
    results = spotify.user_playlist_tracks(playlist_id=playlist)
    for item in results['items']:
        tracks.append(item['track']['name'])
        artists.append(item['track']['artists'][0]['name'])
        lista_rep.append(playlist)
        try:
            dates.append(item['track']['release_date'])
        except:
            dates.append(np.nan)
        track_id.append(item['track']['id'])

#Merge structures in a single dictionary
dict ={
    'cancion':tracks,
    'artista':artists,
    'fecha_lanzamiento':dates,
    'playlist':lista_rep,
    'track_id':track_id,
}

#Transform dictionary to Dataframe
df =pd.DataFrame(dict)
df

Unnamed: 0,cancion,artista,fecha_lanzamiento,playlist,track_id
0,Immortal Rites,Morbid Angel,,4tszLL7NTfLCoIz39Zsiy1,5hmek3mrSYvfSElBsPNbxo
1,Chopped in Half,Obituary,,4tszLL7NTfLCoIz39Zsiy1,01cGujYWGF7JchJLSgf6Ta
2,Left Hand Path,Entombed,,4tszLL7NTfLCoIz39Zsiy1,5faD0zZ9fMa3J5ZN3lIWtp
3,Pull the Plug,Death,,4tszLL7NTfLCoIz39Zsiy1,2l0h4aBFLp9HdoaNdCTlbW
4,Into The Grave,Grave,,4tszLL7NTfLCoIz39Zsiy1,4bAIIhqJeOTDcyeo1GvIMo
...,...,...,...,...,...
984,No Es Por Acá,Carin Leon,,3IyNJEsknaSFoUIn8qf1Lr,3bvJftZKZe5QKz433NczyV
985,Solo Un Dia (Ahora Te Amo),Adan Romero,,3IyNJEsknaSFoUIn8qf1Lr,51RcAoNIqe0G3284yxT8G0
986,La Buena y la Mala,Banda Tierra Sagrada,,3IyNJEsknaSFoUIn8qf1Lr,4y0n8xKuEKE0J2sThzswhg
987,Que Te Vaya Bien,Julión Álvarez y su Norteño Banda,,3IyNJEsknaSFoUIn8qf1Lr,1kJXYVVUu7o3B9gaJpoxjm


In [94]:
#Dta structures for song metrics
danceability=[]
energy=[]
key=[]
loudness=[]
speechiness=[]
acousticness=[]
instrumentalness=[]
liveness=[]
valence=[]

#Go song by song and retrieve metrics
for cancion in df['track_id']:
    results = spotify.audio_features(tracks=[cancion])
    danceability.append(results[0]['danceability'])
    energy.append(results[0]['energy'])
    key.append(results[0]['key'])
    loudness.append(results[0]['loudness'])
    speechiness.append(results[0]['speechiness'])
    acousticness.append(results[0]['acousticness'])
    instrumentalness.append(results[0]['instrumentalness'])
    liveness.append(results[0]['liveness'])
    valence.append(results[0]['valence'])

#Merge all metric data in a single dict
dict ={
    'cancion':tracks,
    'artista':artists,
    'fecha_lanzamiento':dates,
    'playlist':lista_rep,
    'track_id':track_id,
    'danceability':danceability,
    'energy':energy,
    'key':key,
    'loudness':loudness,
    'speechiness':speechiness,
    'acousticness':acousticness,
    'instrumentalness':instrumentalness,
    'liveness':liveness,
    'valence':valence
}

#Update dataframe so that it features these metrics
df =pd.DataFrame(dict)
df


Unnamed: 0,cancion,artista,fecha_lanzamiento,playlist,track_id,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence
0,Immortal Rites,Morbid Angel,,4tszLL7NTfLCoIz39Zsiy1,5hmek3mrSYvfSElBsPNbxo,0.190,0.934,1,-7.796,0.0560,0.000008,0.107,0.3630,0.360
1,Chopped in Half,Obituary,,4tszLL7NTfLCoIz39Zsiy1,01cGujYWGF7JchJLSgf6Ta,0.257,0.989,10,-5.918,0.0867,0.000007,0.521,0.2630,0.385
2,Left Hand Path,Entombed,,4tszLL7NTfLCoIz39Zsiy1,5faD0zZ9fMa3J5ZN3lIWtp,0.166,0.927,7,-8.797,0.0736,0.000004,0.651,0.3440,0.219
3,Pull the Plug,Death,,4tszLL7NTfLCoIz39Zsiy1,2l0h4aBFLp9HdoaNdCTlbW,0.226,0.978,9,-5.729,0.2090,0.000004,0.533,0.0436,0.242
4,Into The Grave,Grave,,4tszLL7NTfLCoIz39Zsiy1,4bAIIhqJeOTDcyeo1GvIMo,0.295,0.915,1,-6.968,0.0954,0.000003,0.910,0.0772,0.343
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
984,No Es Por Acá,Carin Leon,,3IyNJEsknaSFoUIn8qf1Lr,3bvJftZKZe5QKz433NczyV,0.746,0.370,2,-6.309,0.0306,0.456000,0.000,0.1060,0.595
985,Solo Un Dia (Ahora Te Amo),Adan Romero,,3IyNJEsknaSFoUIn8qf1Lr,51RcAoNIqe0G3284yxT8G0,0.692,0.377,5,-7.128,0.0497,0.522000,0.000,0.0965,0.939
986,La Buena y la Mala,Banda Tierra Sagrada,,3IyNJEsknaSFoUIn8qf1Lr,4y0n8xKuEKE0J2sThzswhg,0.805,0.716,0,-4.498,0.0390,0.527000,0.000,0.2470,0.905
987,Que Te Vaya Bien,Julión Álvarez y su Norteño Banda,,3IyNJEsknaSFoUIn8qf1Lr,1kJXYVVUu7o3B9gaJpoxjm,0.665,0.469,4,-5.809,0.0280,0.230000,0.000,0.2340,0.383


In [95]:
#Making sure dimenssions are the same across the board
print(len(tracks))
print(len(artists))
print(len(dates))
print(len(lista_rep))
print(len(track_id))
print(len(danceability))
print(len(energy))
print(len(key))
print(len(loudness))
print(len(speechiness))
print(len(acousticness))
print(len(instrumentalness))
print(len(liveness))
print(len(valence))



989
989
989
989
989
989
989
989
989
989
989
989
989
989


In [96]:
#Mapping playlist ID's to their actual names
playlist_dict ={
'4tszLL7NTfLCoIz39Zsiy1': 'Death Metal',
'1oUgBxHbwjbscgj0vOWkJY': 'Corridos para Pistear',
'37i9dQZF1DX2shzuwwKw0y': 'Corridos Perrones',
'37i9dQZF1DWXRqgorJj26U': 'Classic Rock',
'1DeVya9cgtEGZJ0eiREWJJ': 'Salsas Ching@nas',
'37i9dQZF1DWWOaP4H0w5b0':'Metal Essentials',
'0EfZGNw0JHvoCkNfnMDYAU':'Cumbias de Microbusero',
'37i9dQZF1DZ06evO1sJmec': 'This is Metallica',
'37i9dQZF1DZ06evO3RbzfW': 'This is Rammstein',
'6L3dKN1gZU74cStCIgA0yS': 'Perreo para barrer el piso con la cola',
'37i9dQZF1DZ06evO1xnOx2': 'This is Hozier',
'37i9dQZF1DZ06evO1mfBM4': 'This is Luis Miguel',
'37i9dQZF1DZ06evO0Mu9Si': 'This is Paquita la del Barrio',
'3IyNJEsknaSFoUIn8qf1Lr': 'Banda 2022'}

df['Lista Rep'] = df['playlist'].map(playlist_dict)

In [97]:
#Making sure everything is ok
df.head()

Unnamed: 0,cancion,artista,fecha_lanzamiento,playlist,track_id,danceability,energy,key,loudness,speechiness,acousticness,instrumentalness,liveness,valence,Lista Rep
0,Immortal Rites,Morbid Angel,,4tszLL7NTfLCoIz39Zsiy1,5hmek3mrSYvfSElBsPNbxo,0.19,0.934,1,-7.796,0.056,8e-06,0.107,0.363,0.36,Death Metal
1,Chopped in Half,Obituary,,4tszLL7NTfLCoIz39Zsiy1,01cGujYWGF7JchJLSgf6Ta,0.257,0.989,10,-5.918,0.0867,7e-06,0.521,0.263,0.385,Death Metal
2,Left Hand Path,Entombed,,4tszLL7NTfLCoIz39Zsiy1,5faD0zZ9fMa3J5ZN3lIWtp,0.166,0.927,7,-8.797,0.0736,4e-06,0.651,0.344,0.219,Death Metal
3,Pull the Plug,Death,,4tszLL7NTfLCoIz39Zsiy1,2l0h4aBFLp9HdoaNdCTlbW,0.226,0.978,9,-5.729,0.209,4e-06,0.533,0.0436,0.242,Death Metal
4,Into The Grave,Grave,,4tszLL7NTfLCoIz39Zsiy1,4bAIIhqJeOTDcyeo1GvIMo,0.295,0.915,1,-6.968,0.0954,3e-06,0.91,0.0772,0.343,Death Metal


In [98]:
#Making sure the mapping went fine
df['Lista Rep'].unique()

array(['Death Metal', 'Corridos para Pistear', 'Corridos Perrones',
       'Classic Rock', 'Salsas Ching@nas', 'Metal Essentials',
       'Cumbias de Microbusero', 'This is Metallica', 'This is Rammstein',
       'Perreo para barrer el piso con la cola', ' This is Hozier',
       'This is Luis Miguel', 'This is Paquita la del Barrio',
       'Banda 2022'], dtype=object)

In [99]:
#Exporting results to a CSV for later use
df.to_csv('SpotifyMex.csv')