In [1]:
import pandas as pd
import numpy as np

#-----------------------------------------------#

import time
import os
from dotenv import load_dotenv

#-----------------------------------------------#

from spotipy.oauth2 import SpotifyOAuth
import spotipy
import spotipy.util as util
from spotipy.cache_handler import CacheHandler

#-----------------------------------------------#

In [2]:
load_dotenv()

True

#### Setting Up Spotify API Client Authentication

In [3]:
cid = os.getenv('SPOTIPY_CLIENT_ID')
secret = os.getenv('SPOTIPY_CLIENT_SECRET')
redirect_uri = 'http://localhost:8080'
username = os.getenv('USERNAME')

scope = 'user-top-read playlist-modify-public playlist-modify-private'
cache_path = os.path.abspath(f"../spotify_data_cache/{username}")
auth_manager = SpotifyOAuth(client_id=cid, client_secret=secret, redirect_uri=redirect_uri, scope=scope, username=username, cache_path=cache_path)
sp = spotipy.Spotify(auth_manager=auth_manager)

#### Fetching Top 20 Tracks from Spotify for Current User

In [4]:
top20_raw = sp.current_user_top_tracks(time_range='short_term', limit=20)

#### Printing Top 20 Spotify Tracks with Artist Names

In [17]:
for i, item in enumerate(top20_raw['items']):
    print(i+1, item['name'], '//', item['artists'][0]['name'])

#### Extracting Track IDs, Names, and Audio Features from Top 20 Spotify Tracks

In [11]:
import pandas as pd

# Obtén la lista de las canciones
tracks = top20_raw['items']

# Crea listas para almacenar la información de las canciones y sus características de audio
track_ids = []
track_names = []
artists = []
album_names = []
release_dates = []
popularity = []
acousticness = []
danceability = []
energy = []
instrumentalness = []
liveness = []
loudness = []
speechiness = []
tempo = []
valence = []
explicit = []  # Agrega la columna explicit
duration_ms = []  # Agrega la columna duration_ms

for track in tracks:
    track_id = track['id']
    track_name = track['name']
    
    artist_names = [artist['name'] for artist in track['artists']]
    artist_names_str = ', '.join(artist_names)
    
    album_name = track['album']['name']
    release_date = track['album']['release_date']
    track_popularity = track['popularity']
    
    track_ids.append(track_id)
    track_names.append(track_name)
    artists.append(artist_names_str)
    album_names.append(album_name)
    release_dates.append(release_date)
    popularity.append(track_popularity)
    
    audio_features = sp.audio_features(track_id)[0]
    acousticness.append(audio_features['acousticness'])
    danceability.append(audio_features['danceability'])
    energy.append(audio_features['energy'])
    instrumentalness.append(audio_features['instrumentalness'])
    liveness.append(audio_features['liveness'])
    loudness.append(audio_features['loudness'])
    speechiness.append(audio_features['speechiness'])
    tempo.append(audio_features['tempo'])
    valence.append(audio_features['valence'])
    
    explicitness = track.get('explicit', None)  # Obtiene la información sobre si la canción es explícita
    explicit.append(explicitness)  # Agrega la información a la lista explicit
    
    duration = audio_features.get('duration_ms', None)  # Obtiene la duración en milisegundos
    duration_ms.append(duration)  # Agrega la duración a la lista duration_ms

df = pd.DataFrame({
    'Track ID': track_ids,
    'Track Name': track_names,
    'Artists': artists,
    'Album Name': album_names,
    'Release Date': release_dates,
    'Explicit': explicit,  # Agrega la columna Explicit
    'Popularity': popularity,
    'Acousticness': acousticness,
    'Danceability': danceability,
    'Energy': energy,
    'Instrumentalness': instrumentalness,
    'Liveness': liveness,
    'Loudness': loudness,
    'Speechiness': speechiness,
    'Tempo': tempo,
    'Valence': valence,
    'Duration_ms': duration_ms  # Agrega la columna Duration_ms
})


In [12]:
top20 = df[['Track ID', 'Danceability', 'Energy', 'Loudness', 'Speechiness', 'Acousticness', 'Instrumentalness', 'Liveness', 'Valence', 'Tempo', 'Duration_ms']]

# Guarda los DataFrames en archivos CSV
df.to_csv('../data/my_top_20.csv', index=False)
top20.to_csv('../data/clean_data_top20.csv', index=track_name)

#### Listing Artist IDs and Names from My Top 20 Spotify Tracks

In [13]:
ids_artists = []
print('Artists in my top 20:')
print('=====================')
for item in top20_raw['items']:
    artist_id = item['artists'][0]['id']
    artist_name = item['artists'][0]['name']
    print(f'{artist_id}: {artist_name}')
    ids_artists.append(artist_id)

Artists in my top 20:
0jbo7KFNMiIkfBR6ih0yhm: iñigo quintero
790FomKkXshlbRYZFtlgla: KAROL G
45dkTj5sMRSjrmBSBeiHym: Tate McRae
2ayNSoKPCRAfjp6hQ76hRu: Nacho
790FomKkXshlbRYZFtlgla: KAROL G
2wY79sveU1sp5g7SokKOiI: Sam Smith
3psizJPIbIEEctInvdWSZk: GUNTTER
4Uc8Dsxct0oMqx0P6i60ea: Conan Gray
7iK8PXO48WeuP03g8YR51W: Myke Towers
0jbo7KFNMiIkfBR6ih0yhm: iñigo quintero
22wbnEMDvgVIAGdFeek6ET: Benson Boone
39csiig8WOh2SMguk2nuL5: Daniel Leggs
5KsRA81UaMVKvLNiwDySfp: Jonah Kagen
4IWBUUAFIplrNtaOHcJPRM: James Arthur
1TtXnWcUs0FCkaZDPGYHdf: Darell
74KM79TiuVKeVCqs8QtB0B: Sabrina Carpenter
21451j1KhjAiaYKflxBjr1: Zion & Lennox
2jHp7gQArCQrlMvdrIVFCg: Anson Seabra
2C1Pj5TMq8QjqbwQ4tmDtO: Punto40
1SupJlEpv7RS2tPNRaHViT: Nicky Jam


#### Counting Unique Artists in Top 20 Spotify Tracks

In [14]:
ids_artists = list(set(ids_artists))
print(f'Número de artistas (sin repeticiones): {len(ids_artists)}')

Número de artistas (sin repeticiones): 18


#### Retrieving and Displaying Similar Artists for Top Spotify Artists

In [15]:
import cachetools
from cachetools import TTLCache

cache = TTLCache(maxsize=100, ttl=3600)

def get_artist_related_artists_with_cache(sp, artist_id):
    # Verifica si el resultado está en caché
    if artist_id in cache:
        print(f'Usando resultado en caché para {artist_id}')
        return cache[artist_id]
    
    # Si no está en caché, realiza la llamada a la API
    artists = sp.artist_related_artists(artist_id)['artists']
    
    # Almacena el resultado en caché
    cache[artist_id] = artists
    
    return artists

# Ahora puedes usar esta función para obtener los artistas relacionados, y se almacenarán en caché
print('Similar Artists:')
print('=====================')
ids_similar_artists = []
for artist_id in ids_artists:
    artists = get_artist_related_artists_with_cache(sp, artist_id)
    for item in artists:
        artist_id = item['id']
        artist_name = item['name']
        print(f'{artist_id}: {artist_name}')
        ids_similar_artists.append(artist_id)


Similar Artists:


#### Appending Similar Artists' IDs to the List of Original Artist IDs

In [None]:
ids_artists.extend(ids_similar_artists)

#### Counting Unique Artists Including Similar Artists

In [10]:
ids_artists = list(set(ids_artists))
print(f'Number of artists (without repetitions): {len(ids_artists)}')

Number of artists (without repetitions): 18


#### Fetching Top 20 New Releases from Spotify

In [11]:
new_releases = sp.new_releases(limit=20)['albums']

#### Listing Artists with Recent Album Releases on Spotify

In [12]:
print('')
print('Recent Album Releases:')
print('=====================')
for item in new_releases['items']:
    artist_id = item['artists'][0]['id']
    artist_name = item['artists'][0]['name']
    album_name = item['name']
    release_date = item['release_date']
    print(f'{artist_id}: {artist_name} - // {album_name}, {release_date}')
    ids_artists.append(artist_id)


Recent Album Releases:
1Mw40k757jZuiL0NIJpdO5: GULEED - // Cuando Menos Lo Espera, 2023-07-07
6k8mwkKJKKjBILo7ypBspl: Ana Mena - // bellodrama, 2023-03-24
7iK8PXO48WeuP03g8YR51W: Myke Towers - // LA VIDA ES UNA, 2023-03-23
5XJDexmWFLWOkjOEjOVX3e: Eladio Carrion - // 3MEN2 KBRN, 2023-03-17
790FomKkXshlbRYZFtlgla: KAROL G - // MAÑANA SERÁ BONITO, 2023-02-24
2auC28zjQyVTsiZKNgPRGs: RM - // Indigo, 2022-12-02
6KImCVD70vtIoJWnq6nGn3: Harry Styles - // Harry's House, 2022-05-20
4q3ewBCX7sLwd24euuV69X: Bad Bunny - // Un Verano Sin Ti, 2022-05-06
2R21vXR83lH98kGeO99Y66: Anuel AA - // Las Leyendas Nunca Mueren, 2021-11-26
4dpARuHxo51G3z768sgnrY: Adele - // 30, 2021-11-19
6eUKZXaKkcviH0Ku9w2n3V: Ed Sheeran - // =, 2021-10-29
4gzpq5DPGxSnKTe4SA8HAU: Coldplay - // Music Of The Spheres, 2021-10-15
53KwLdlmrlCelAZMaLVZqU: James Blake - // Friends That Break Your Heart, 2021-10-08
4MzJMcHQBl9SIYSjwWn8QW: Spiritbox - // Eternal Blue, 2021-09-17
1vyhD5VmyZ7KMfW5gqLgo5: J Balvin - // JOSE, 2021-09-10
5

#### Updating and Counting Unique Artist IDs After Adding Recent Releases

In [13]:
ids_artists = list(set(ids_artists))
print(f'Number of artists (without repetitions): {len(ids_artists)}')

Number of artists (without repetitions): 36


#### Collecting the Latest Album ID for Each Unique Artist from Spotify

In [14]:
id_albums = []
nartists = len(ids_artists)
for i, id_artist in enumerate(ids_artists):
    print(f'Processing artist {i+1} of {nartists}...')
    albums = sp.artist_albums(id_artist, limit=1)
    for album in albums['items']:
        id_albums.append(album['id'])
    time.sleep(1)
print('Done!')

Processing artist 1 of 36...


#### Retrieving Track IDs from Albums with a Limit of 3 Tracks Per Album

In [None]:
id_tracks = []
nalbums = len(id_albums)
for i, id_album in enumerate(id_albums):
    print(f'Processing album {i+1} of {nalbums}...')
    album_tracks = sp.album_tracks(id_album, limit=3)
    for track in album_tracks['items']:
        id_tracks.append(track['id'])
    #time.sleep(1)  # Adding a 1-second delay to avoid rate limiting
print(f'Done! Total number of pre-candidate tracks: {len(id_tracks)}')

#### Extracting Track Names and Audio Features for Pre-Candidate Tracks

In [None]:
track_names = []
features = []
ntracks = len(id_tracks)
for i, track_id in enumerate(id_tracks):
    print(f'Processing track {i+1} de {ntracks}...')
    track_name = sp.track(track_id)['name']
    audio_features = sp.audio_features(track_id)
    
    if audio_features[0] != None:
        track_names.append(track_name)
        features.append(audio_features[0])
print('Donee!')

#### Creating a DataFrame and Exporting Selected Columns to CSV

In [None]:
candidatos_df = pd.DataFrame(features,index = track_names)
candidatos = candidatos_df[['id', 'danceability',	'energy',	'key',	'loudness',	'mode',	'speechiness',	'acousticness',	'instrumentalness',	'liveness',	'valence',	'tempo', 'duration_ms']]
candidatos_df.to_csv('../data/candidatos.csv')

#### Combining and Updating DataFrame for Candidate Tracks

In [None]:
columns_to_keep = ['id', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']

candidatos = candidatos_df[columns_to_keep]
if len(track_names) == len(candidatos):
    candidatos.index = track_names
else:
    print("Error: Length of track_names does not match length of candidates_df")

csv_file_path = '../data/candidatos.csv'

if os.path.exists(csv_file_path):
    datos_existentes_df = pd.read_csv(csv_file_path, index_col=0)
    datos_combinados_df = pd.concat([candidatos, datos_existentes_df]).drop_duplicates()
else:
    datos_combinados_df = candidatos

datos_combinados_df.to_csv(csv_file_path, index=True)

In [None]:
read = pd.read_csv('../data/candidatos.csv', index_col=0)