# Data extraction

## Access data:

Spotipy es una biblioteca de Python que permite interactuar con la API de Spotify. Se utiliza para acceder y manipular datos de Spotify, como obtener información de canciones, artistas, álbumes, listas de reproducción y realizar acciones como reproducir pistas, crear listas de reproducción y mucho más.

In [1]:
# Para acceder a los datos de Spotify necesitamos leer las credenciales que nos da la API: clientid y client_secret
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

with open("api.txt") as f:
        secret_ls = f.readlines()
        client_id = secret_ls[0][:-1]
        secret = secret_ls[1]

In [2]:
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

## Import playlists:

In [11]:
# 1/ happy playlist link: "Happy hits!"
happy_pl_link = "https://open.spotify.com/playlist/37i9dQZF1DXdPec7aLTmlC?si=6d0d8a354930406b"
happy_URI = happy_pl_link.split("/")[-1].split("?")[0]
# 2/ sad playlist link: "canciones tristes"
sad_pl_link = "https://open.spotify.com/playlist/37i9dQZF1DXdZjf8WgcTKM?si=66918a9ecd434eda"
sad_URI = sad_pl_link.split("/")[-1].split("?")[0]
# 3/ anger playlist link: "Walk like a badass"
ang_pl_link = "https://open.spotify.com/playlist/37i9dQZF1DX1tyCD9QhIWF?si=45e2a041dbbf4cbb"
anger_URI = ang_pl_link.split("/")[-1].split("?")[0]
# 4/ sppoky playlist link: "Spooky"
spooky_pl_link = "https://open.spotify.com/playlist/37i9dQZF1DX5hol82XuK24?si=29cf60e5500b488a"
spooky_URI = spooky_pl_link.split("/")[-1].split("?")[0]
# 5/ focus playlist link: "Deep Focus"
focus_pl_link = "https://open.spotify.com/playlist/37i9dQZF1DWZeKCadgRdKQ?si=635dd64ee7fa477b"
focus_URI = focus_pl_link.split("/")[-1].split("?")[0]

playlist_URI = [happy_URI, sad_URI, anger_URI, spooky_URI, focus_URI]

In [8]:
for track in sp.playlist_tracks(focus_URI)["items"]:
    track_uri = track["track"]["uri"]

sp.audio_features(track_uri)[0]

{'danceability': 0.348,
 'energy': 0.0549,
 'key': 4,
 'loudness': -21.011,
 'mode': 0,
 'speechiness': 0.0358,
 'acousticness': 0.989,
 'instrumentalness': 0.92,
 'liveness': 0.108,
 'valence': 0.237,
 'tempo': 76.177,
 'type': 'audio_features',
 'id': '23sRVWvfwWdagkqC95eS6n',
 'uri': 'spotify:track:23sRVWvfwWdagkqC95eS6n',
 'track_href': 'https://api.spotify.com/v1/tracks/23sRVWvfwWdagkqC95eS6n',
 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/23sRVWvfwWdagkqC95eS6n',
 'duration_ms': 168185,
 'time_signature': 3}

In [None]:
df = pd.DataFrame()
# Lista de nombres de características
feature_names = ['danceability', 'energy', 'key', 'loudness', 'mode',
                 'speechiness', 'acousticness', 'instrumentalness',
                 'liveness', 'valence', 'tempo', 'duration_ms']

# Recorre cada playlist URI
for uri in playlist_URI:
    # Obtiene las canciones de la playlist
    results = sp.playlist_tracks(uri)
    tracks = results['items']
    # Recorre cada canción de la playlist
    for track in tracks:
        # Nombre de la playlist
        df['playlist_name'] = sp.playlist(uri)['name']
        
        # Nombre del artista
        df['artist_name'] = track['track']['artists'][0]['name']
        
        # Popularidad de la canción
        df['popularity'] = track['track']['popularity']
        
        # Obtiene las features de la canción
        features = sp.audio_features(track['track']['uri'])
        for feature_name in feature_names:
            df[feature_name] = features[0][feature_name]
print(df)

In [12]:
# Diccionario para almacenar la información
playlist_data = []

# Lista de nombres de características
feature_names = ['danceability', 'energy', 'key', 'loudness', 'mode',
                 'speechiness', 'acousticness', 'instrumentalness',
                 'liveness', 'valence', 'tempo', 'duration_ms']

# Recorre cada playlist URI
for uri in playlist_URI:
    # Obtiene las canciones de la playlist
    results = sp.playlist_tracks(uri)
    tracks = results['items']
    
    # Recorre cada canción de la playlist
    for track in tracks:
        # Nombre de la playlist
        playlist_name = sp.playlist(uri)['name']
        
        # Nombre del artista
        artist_name = track['track']['artists'][0]['name']
        
        # Popularidad de la canción
        popularity = track['track']['popularity']
        
        # Obtiene las features de la canción
        features = sp.audio_features(track['track']['uri'])
        
        # HACERLO MAS SENCILLO CREAR COLUMNA POR VARIABLE
        song_info = {
            'URI': track['track']['uri'],
            'Playlist': playlist_name,
            'Artista': artist_name
            'Popularidad': popularity
        }
        
        # Añade las características al diccionario
        for feature_name in feature_names:
            song_info[feature_name] = features[0][feature_name]
        
        # Añade el diccionario a la lista de canciones
        playlist_data.append(song_info)

# Crea el DataFrame a partir de la lista de canciones
df = pd.DataFrame(playlist_data)

print(df)

URI: spotify:track:4uUG5RXrOk84mYEfFvj3cK
Playlist: Happy Hits!
Artista: David Guetta
Features: {'danceability': 0.561, 'energy': 0.965, 'key': 7, 'loudness': -3.673, 'mode': 0, 'speechiness': 0.0343, 'acousticness': 0.00383, 'instrumentalness': 7.07e-06, 'liveness': 0.371, 'valence': 0.304, 'tempo': 128.04, 'type': 'audio_features', 'id': '4uUG5RXrOk84mYEfFvj3cK', 'uri': 'spotify:track:4uUG5RXrOk84mYEfFvj3cK', 'track_href': 'https://api.spotify.com/v1/tracks/4uUG5RXrOk84mYEfFvj3cK', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/4uUG5RXrOk84mYEfFvj3cK', 'duration_ms': 175238, 'time_signature': 4}

URI: spotify:track:4h9wh7iOZ0GGn8QVp4RAOB
Playlist: Happy Hits!
Artista: OneRepublic
Features: {'danceability': 0.704, 'energy': 0.797, 'key': 0, 'loudness': -5.927, 'mode': 1, 'speechiness': 0.0475, 'acousticness': 0.0826, 'instrumentalness': 0.000745, 'liveness': 0.0546, 'valence': 0.825, 'tempo': 139.994, 'type': 'audio_features', 'id': '4h9wh7iOZ0GGn8QVp4RAOB', 'uri': 'spotif