In [1]:
import pickle
import json
import pandas as pd
import numpy as np

#-----------------------------------------------#

import requests
import threading
import time
import os
from dotenv import load_dotenv

#-----------------------------------------------#

from spotipy.oauth2 import SpotifyOAuth
import spotipy
import spotipy.util as util

#-----------------------------------------------#

from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import linear_kernel

In [2]:
load_dotenv()

True

In [3]:
'''Setting up a directory for caching Spotify data and initializig Spotify API credentials and user details using environment variables. 
It prepares for connecting to the Spotify API by defining the cache directory, client ID, client secret, redirect URI, and username.'''

CACHE_DIR = './spotify_data_cache'
cid = os.getenv('SPOTIPY_CLIENT_ID')
secret = os.getenv('SPOTIPY_CLIENT_SECRET')
redirect_uri = 'http://localhost:8080'
username = os.getenv('USERNAME')

In [4]:
''' This code initializes Spotify API authorization with specific scopes for reading user's top tracks and modifying playlists'''

scope = 'user-top-read playlist-modify-public playlist-modify-private'
auth_manager = SpotifyOAuth(client_id=cid, client_secret=secret, redirect_uri=redirect_uri, scope=scope, username=username)
sp = spotipy.Spotify(auth_manager=auth_manager, requests_timeout=10)

In [5]:

def get_top_tracks(sp, time_range='short_term', limit=20):
    
    '''This function, get_top_tracks, retrieves a Spotify user's top tracks, utilizing caching to store and access the data efficiently. 
    It checks for cached data in a specified directory before making an API call, saving any new data retrieved for future use.'''
    
    cache_file_path = os.path.join(CACHE_DIR, 'all_top_tracks.json')

    if os.path.exists(cache_file_path):
        with open(cache_file_path, 'r') as cache_file:
            return json.load(cache_file)

    top_tracks = sp.current_user_top_tracks(time_range=time_range, limit=limit)

    with open(cache_file_path, 'w') as cache_file:
        json.dump(top_tracks, cache_file)

    return top_tracks

top_tracks = get_top_tracks(sp)

In [6]:
for i, item in enumerate(top_tracks['items']):
    print(i+1, item['name'], '//', item['artists'][0]['name'])

1 Schatzi bitte - Dream DJ Team Mix // Bitschu Batschu
2 Another Love // Tom Odell
3 Schmetterling // Baby B3ns
4 Cherries // Layfullstop
5 No Hands (feat. Roscoe Dash & Wale) // Waka Flocka Flame
6 I Miss You // blink-182
7 The Rapture Pt.III // &ME
8 Haifischlaserknarre // Mausio
9 Good Guy // Frank Ocean
10 Put Your Records On // Corinne Bailey Rae
11 Palestine // Ali Bumaye
12 Mbappe // Eladio Carrion
13 Krustenf!cker // Mausio
14 Wir haben Spass und ihr nicht // Anna Ullrich
15 TABU. // Yung Yury
16 Frei sein // DREAM DJ TEAM
17 Mädchen auf dem Pferd // Luca-Dante Spadafora
18 Love Yuh Bad // Popcaan
19 Jorja Interlude // Drake
20 Bounce // Joose The Conqueror


In [7]:
def create_tracks_dataframe(sp, top_tracks):
    '''
    This function processes a list of Spotify track data, extracting each track's ID, 
    name, and audio features, and then compiles this information into a Pandas DataFrame. 
    The DataFrame is indexed by track names for easy reference and analysis. It utilizes caching 
    to store and access the audio features data efficiently.
    '''

    tracks = top_tracks['items']
    track_ids = []  
    track_names = []
    features = []

    for track in tracks:
        track_id = track['id']
        track_name = track['name']

        cache_file_path = os.path.join(CACHE_DIR, 'tracks_features.json')

        if os.path.exists(cache_file_path):
            with open(cache_file_path, 'r') as cache_file:
                audio_features = json.load(cache_file)
        else:
            audio_features = sp.audio_features(track_id)
            with open(cache_file_path, 'w') as cache_file:
                json.dump(audio_features, cache_file)

        track_ids.append(track_id)
        track_names.append(track_name)
        features.append(audio_features[0] if audio_features else None)
    
    top_tracks_df = pd.DataFrame(features, index=track_names)
    
    return top_tracks_df, track_ids

In [8]:
'''Calling the previues function to create the new dataframe'''

top_tracks_df, track_ids = create_tracks_dataframe(sp, top_tracks)

In [9]:
def cleaning_df(track):
    
    '''The function cleaning_df takes a DataFrame track as input and returns a cleaned version, 
    top_tracks_clean, which contains only selected columns.'''
    
    top_tracks_clean = track[['id', 'danceability',	'energy',	'key',	'loudness',	'mode',	'speechiness',	'acousticness',	'instrumentalness',	'liveness',	'valence',	'tempo', 'duration_ms']]
    return top_tracks_clean

In [10]:
'''Calling the cleaning function to apply to the dataset'''

top_tracks_clean = cleaning_df(top_tracks_df)

In [11]:
def getting_id_artists(df):
    '''
    This function processes a DataFrame that contain Spotify track data, 
    and extracts the IDs of artists from its 'items' column. It prints the IDs and names 
    of the artists, and returns a list of these artist IDs.
    '''

    ids_artists = []
    print('|| Artists in my top 20: ||')
    print('===========================')

    for item in df['items']:
        artist_id = item['artists'][0]['id']
        artist_name = item['artists'][0]['name']
        print(f'{artist_id}: {artist_name}')
        ids_artists.append(artist_id)

    return ids_artists

In [12]:
ids_artists = getting_id_artists(top_tracks)

|| Artists in my top 20: ||
20sx3E16MRur3wr3T9lWrt: Bitschu Batschu
2txHhyCwHjUEpJjWrEyqyX: Tom Odell
0geGEoVXjWIz38cw8JcqVP: Baby B3ns
44KO1So8LqWGobs7xNCqe9: Layfullstop
6f4XkbvYlXMH0QgVRzW0sM: Waka Flocka Flame
6FBDaR13swtiWwGhX1WQsP: blink-182
5mIowAJMp7RKNheelruV5z: &ME
5yzLRjAu6ni4Bb3fQDED2q: Mausio
2h93pZq0e7k5yf4dywlkpM: Frank Ocean
29WzbAQtDnBJF09es0uddn: Corinne Bailey Rae
6hMNiBKUQFKpnZ7GTvLVZf: Ali Bumaye
5XJDexmWFLWOkjOEjOVX3e: Eladio Carrion
5yzLRjAu6ni4Bb3fQDED2q: Mausio
50KbwRJgMFxSmT50FamuDN: Anna Ullrich
7tzELpzPjTq26pa5FT9ykz: Yung Yury
78HKXj2z44JthdgoqT9ElL: DREAM DJ TEAM
6k9e4mFFVtEAAhGdvJBtqB: Luca-Dante Spadafora
62DmErcU7dqZbJaDqwsqzR: Popcaan
3TVXtAsR1Inumwj472S9r4: Drake
3er7r6Hdg1VJzFfI4iLPzL: Joose The Conqueror


In [13]:
""" This code converts the ids_artists list to a set to remove any repeated IDs and then back to a list, displaying the count of unique artists.
"""

ids_artists = list(set(ids_artists))
print(f'Number of artists (without repetitions): {len(ids_artists)}')

Number of artists (without repetitions): 19


In [14]:
def get_similar_artists(sp, ids_artists, CACHE_DIR, CACHE_FILE='all_similar_artists.pickle'):
    '''
    This function takes a Spotify client 'sp' and a list of artist IDs 'ids_artists', finds similar 
    artists for each artist in the list, and prints their IDs and names. It utilizes a single .pickle file 
    caching system to efficiently store and access this data for all artists. The function returns a list of the IDs 
    of these similar artists.
    '''

    print('Similar Artists:')
    print('=====================')

    cache_file = os.path.join(CACHE_DIR, CACHE_FILE)

    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as file:
            all_similar_artists = pickle.load(file)
    else:
        all_similar_artists = {}

    ids_similar_artists = []

    for artist_id in ids_artists:
        if artist_id not in all_similar_artists:
            artists = sp.artist_related_artists(artist_id)['artists']
            all_similar_artists[artist_id] = artists
            with open(cache_file, 'wb') as file:
                pickle.dump(all_similar_artists, file)
        else:
            artists = all_similar_artists[artist_id]

        for item in artists:
            similar_artist_id = item['id']
            similar_artist_name = item['name']
            print(f'{similar_artist_id}: {similar_artist_name}')
            ids_similar_artists.append(similar_artist_id)
        
        time.sleep(1)
    return ids_similar_artists

In [15]:
'''Calling the get_similar_artists function to create a new list'''

ids_similar_artists = get_similar_artists(sp, ids_artists, CACHE_DIR)

Similar Artists:
6z1cicLMt9XArxN10q7m8a: Travis Porter
0bfX8pF8kuHNCs57Ms4jZb: Roscoe Dash
5gCRApTajqwbnHHPbr2Fpi: Juicy J
23LbwefIODbyGdRbAz3urj: Yung Joc
6Ha4aES39QiVjR0L2lwuwq: Yo Gotti
5lHRUCqkQZCIWeX7xG4sYT: Rich Homie Quan
31HjiqargV4NAw4GZqUale: Ace Hood
5einkgXXrjhfYCyac1FANB: Lil Scrappy
3jksrX4oBklxR78ft8gv3j: Plies
6GMYJwaziB4ekv1Y6wCDWS: Soulja Boy
7Kp7SzuRuNiPFCy7JIwnLx: Ca$h Out
3ciRvbBIVz9fBoPbtSYq4x: Lil Jon & The East Side Boyz
35sCXuy5gN6Or69rZ9vqBs: Birdman
44PA0rCQXikgOWbfY7Fq7m: Ying Yang Twins
0I5HubncQ8E1MFZOlPDY4J: Trinidad James
3Y365ZqTf6fJ4ZcRnLKPGt: Gorilla Zoe
030sRm1rnXI9MeQ23pdax8: Boyz N Da Hood
7jFaoqWPhYLrKzjzlpXmUO: David Banner
2r8r62VGJKGi463aH1HJUZ: Kirko Bangz
0VKTLKamj4IH8OfQbUL0kq: Dem Franchize Boyz


4BxCuXFJrSWGi1KHcVqaU4: Kodaline
0gadJ2b9A4SKsB1RFkBb66: Passenger
4EzkuveR9pLvDVFNx6foYD: James Bay
4Ly0KABsxlx4fNj63zJTrF: Seafret
2WX2uTcsvV5OnS0inACecP: Birdy
10exVja0key0uqUkk6LJRT: Vance Joy
6AyATGg7mDgBlZ4N5uNog0: SYML
1AgxgADPuRIW1wyaA4OKcB: Tom Rosenthal
0MmnmsAuQKRFpo6vJElcaU: Amber Run
2FXC3k01G6Gw61bmprjgqS: Hozier
2ysnwxxNtSgbb9t1m2Ur4j: George Ezra
5schNIzWdI9gJ1QRK8SBnc: Ben Howard
7EQ0qTo7fWT7DPxmxtSYEc: Bastille
6QrQ7OrISRYIfS5mtacaw2: Jaymes Young
7z2avKuuiMAT4XZJFv8Rvh: Tom Walker
0MeLMJJcouYXCymQSHPn8g: Sleeping At Last
3QSQFmccmX81fWCUSPTS7y: Dean Lewis
16oZKvXb6WkQlVAjwo2Wbg: The Lumineers
4qWnlmXWuGv2TtuxtIWlJX: BANNERS
3w6zswp5THsSKYLICUbDTZ: Gabrielle Aplin
5mYWkDD4b1eM4ZjFq5axxs: Aidonia
0eezS9KmhdjGN436RdTIXu: Mavado
2Gzy8TYJ5xrEMDyUjZuDsK: Masicka
2LIAgeQ5NZurwixfoG3CWZ: Alkaline
3nwYsifpwrKmCIpw4i0HDW: Konshens
28UDeKu2FPrU0T7dpUiSGY: Dexta Daps
0mkixmQkcUeE6egKHW4ojh: Jah Vinci
0af5VM6xubf8EXKvoG35x6: Demarco
2yHxc12dEUiLXNeqUadxBh: Tommy Lee Sparta
2NUz5P

In [16]:
ids_artists.extend(ids_similar_artists)
ids_artists = list(set(ids_artists))
print(f'Number of artists (without repetitions): {len(ids_artists)}')

Number of artists (without repetitions): 358


In [17]:
def get_new_releases(sp, limit=20):
    '''
    This function uses a Spotify client 'sp' to fetch new releases. It uses a .pickle file in the 
    'spotify_data_cache' directory as a cache to store and efficiently access the data. 
    It returns the album data with a default limit that can be adjusted.
    '''

    cache_file = os.path.join(CACHE_DIR, 'new_releases_cache.pickle')

    if os.path.exists(cache_file):
        try:
            with open(cache_file, 'rb') as file:
                new_releases = pickle.load(file)
            return new_releases
        except Exception as e:
            print(f"Error reading from the cache: {e}")

    try:
        new_releases = sp.new_releases(limit=limit)['albums']
        with open(cache_file, 'wb') as file:
            pickle.dump(new_releases, file)
        return new_releases
    except Exception as e:
        print(f"Error fetching new releases: {e}")
        return None

new_releases_data = get_new_releases(sp)


In [18]:
def getting_id_artists(df):

    print('')
    print('Artists with new releases')
    print('=====================')
    for item in df['items']:
        artist_id = item['artists'][0]['id']
        artist_name = item['artists'][0]['name']
        album_name = item['name']  
        release_date = item['release_date']
        print(f'{artist_id}: {artist_name} - // {album_name}, {release_date}')
        ids_artists.append(artist_id)
    return ids_artists

In [19]:
ids_artists = getting_id_artists(new_releases_data)


Artists with new releases
1Mw40k757jZuiL0NIJpdO5: GULEED - // Cuando Menos Lo Espera, 2023-07-07
6k8mwkKJKKjBILo7ypBspl: Ana Mena - // bellodrama, 2023-03-24
7iK8PXO48WeuP03g8YR51W: Myke Towers - // LA VIDA ES UNA, 2023-03-23
5XJDexmWFLWOkjOEjOVX3e: Eladio Carrion - // 3MEN2 KBRN, 2023-03-17
790FomKkXshlbRYZFtlgla: KAROL G - // MAÑANA SERÁ BONITO, 2023-02-24
2auC28zjQyVTsiZKNgPRGs: RM - // Indigo, 2022-12-02
6KImCVD70vtIoJWnq6nGn3: Harry Styles - // Harry's House, 2022-05-20
4q3ewBCX7sLwd24euuV69X: Bad Bunny - // Un Verano Sin Ti, 2022-05-06
2R21vXR83lH98kGeO99Y66: Anuel AA - // Las Leyendas Nunca Mueren, 2021-11-26
4dpARuHxo51G3z768sgnrY: Adele - // 30, 2021-11-19
6eUKZXaKkcviH0Ku9w2n3V: Ed Sheeran - // =, 2021-10-29
4gzpq5DPGxSnKTe4SA8HAU: Coldplay - // Music Of The Spheres, 2021-10-15
53KwLdlmrlCelAZMaLVZqU: James Blake - // Friends That Break Your Heart, 2021-10-08
4MzJMcHQBl9SIYSjwWn8QW: Spiritbox - // Eternal Blue, 2021-09-17
1vyhD5VmyZ7KMfW5gqLgo5: J Balvin - // JOSE, 2021-09-1

In [20]:
ids_artists = list(set(ids_artists))
print(f'Number of artists (without repetitions): {len(ids_artists)}')

Number of artists (without repetitions): 372


In [21]:
def get_album_ids(sp, ids_artists,  CACHE_DIR='./spotify_data_cache', CACHE_FILE='artist_albums_cache.pickle'):
    '''
    Retrieves album IDs for each artist in 'ids_artists' using Spotify client 'sp'.
    Utilizes caching and a 3-second delay to avoid rate limiting.
    Returns a list of album IDs.
    '''

    os.makedirs(CACHE_DIR, exist_ok=True)
    cache_file_path = os.path.join(CACHE_DIR, CACHE_FILE)

    # Load cache if exists
    if os.path.exists(cache_file_path):
        with open(cache_file_path, 'rb') as file:
            cached_data = pickle.load(file)
    else:
        cached_data = {}

    id_albums = []
    nartists = len(ids_artists)

    for i, id_artist in enumerate(ids_artists):
        print(f'Processing artist {i+1} of {nartists}...')

        if id_artist in cached_data:
            id_albums.extend(cached_data[id_artist])
            continue

        try:
            albums = sp.artist_albums(id_artist, limit=1)  # to avoid having a huge list
            album_ids = [album['id'] for album in albums['items']]
            id_albums.extend(album_ids)
            cached_data[id_artist] = album_ids

            time.sleep(1)  # Adds 3-seconds delay

        except requests.exceptions.HTTPError as e:
            if e.response.status_code == 429:
                retry_after = int(e.response.headers.get('Retry-After', 3))
                print(f"Rate limit exceeded, waiting for {retry_after} seconds.")
                time.sleep(retry_after)
                continue
            else:
                print(f"Error processing artist {id_artist}: {e}")

    # Save updated cache
    with open(cache_file_path, 'wb') as file:
        pickle.dump(cached_data, file)

    print('Done!')
    return id_albums

In [22]:
id_albums = get_album_ids(sp, ids_artists)

Processing artist 1 of 372...
Processing artist 2 of 372...
Processing artist 3 of 372...
Processing artist 4 of 372...
Processing artist 5 of 372...
Processing artist 6 of 372...
Processing artist 7 of 372...
Processing artist 8 of 372...
Processing artist 9 of 372...
Processing artist 10 of 372...
Processing artist 11 of 372...
Processing artist 12 of 372...
Processing artist 13 of 372...
Processing artist 14 of 372...
Processing artist 15 of 372...
Processing artist 16 of 372...
Processing artist 17 of 372...
Processing artist 18 of 372...
Processing artist 19 of 372...
Processing artist 20 of 372...
Processing artist 21 of 372...
Processing artist 22 of 372...
Processing artist 23 of 372...
Processing artist 24 of 372...
Processing artist 25 of 372...
Processing artist 26 of 372...
Processing artist 27 of 372...
Processing artist 28 of 372...
Processing artist 29 of 372...
Processing artist 30 of 372...
Processing artist 31 of 372...
Processing artist 32 of 372...
Processing artist

In [23]:
def get_track_ids(sp, id_albums, CACHE_DIR='./spotify_data_cache', CACHE_FILE='album_tracks_cache.pickle'):
    '''
    Retrieves track IDs for each album in 'id_albums' using Spotify client 'sp'.
    Utilizes a pickle file for caching and includes error handling for rate limits.
    Returns a list of track IDs.
    '''

    os.makedirs(CACHE_DIR, exist_ok=True)
    cache_file_path = os.path.join(CACHE_DIR, CACHE_FILE)

    if os.path.exists(cache_file_path):
        with open(cache_file_path, 'rb') as file:
            cached_data = pickle.load(file)
    else:
        cached_data = {}

    id_tracks = []
    nalbums = len(id_albums)
    cache_updated = False  # Flag to track if new data has been added to cache

    for i, id_album in enumerate(id_albums):
        print(f'Processing album {i+1} of {nalbums}...')

        if id_album in cached_data:
            id_tracks.extend(cached_data[id_album])
        else:
            try:
                album_tracks = sp.album_tracks(id_album, limit=1)
                track_ids = [track['id'] for track in album_tracks['items']]
                id_tracks.extend(track_ids)
                cached_data[id_album] = track_ids
                cache_updated = True  # Set flag to true as new data is added

                time.sleep(1)  # Adds a 2-second delay

            except requests.exceptions.HTTPError as e:
                if e.response.status_code == 429:
                    retry_after = int(e.response.headers.get('Retry-After', 2))
                    print(f"Rate limit exceeded, waiting for {retry_after} seconds.")
                    time.sleep(retry_after)
                    continue  # Re-attempt the current album
                else:
                    print(f"Error processing album {id_album}: {e}")

    # Save updated cache only if new data has been added
    if cache_updated:
        with open(cache_file_path, 'wb') as file:
            pickle.dump(cached_data, file)
        print("Cache updated with new data.")

    print(f'Done! Total number of pre-candidate tracks: {len(id_tracks)}')
    return id_tracks


id_tracks = []
nalbums = len(id_albums)
for i, id_album in enumerate(id_albums):
    print(f'Processing album {i+1} of {nalbums}...')
    album_tracks = sp.album_tracks(id_album, limit=3)
    for track in album_tracks['items']:
        id_tracks.append(track['id'])
    
    time.sleep(2)  # Adds 3-seconds delay

print(f'Done! Total number of pre-candidate tracks: {len(id_tracks)}')

In [24]:
id_tracks = get_track_ids(sp, id_albums)

Processing album 1 of 6853...


In [None]:
def get_track_details_and_features(sp, id_tracks, CACHE_DIR='./spotify_data_cache', CACHE_FILE='track_features_cache.pickle'):
    '''
    Retrieves track details and audio features for each track in 'id_tracks' using Spotify client 'sp'.
    Utilizes a pickle file for caching and includes error handling for rate limits.
    Returns a DataFrame with track names and features.
    '''

    os.makedirs(CACHE_DIR, exist_ok=True)
    cache_file_path = os.path.join(CACHE_DIR, CACHE_FILE)

    if os.path.exists(cache_file_path):
        with open(cache_file_path, 'rb') as file:
            cached_data = pickle.load(file)
    else:
        cached_data = {}

    track_names = []
    features = []
    ntracks = len(id_tracks)

    for i, track_id in enumerate(id_tracks):
        print(f'Processing track {i+1} of {ntracks}...')

        if track_id not in cached_data:
            track_details = sp.track(track_id)
            track_name = track_details['name']
            audio_features = sp.audio_features(track_id)

            if audio_features[0] is not None:
                cached_data[track_id] = {'name': track_name, 'features': audio_features[0]}
                with open(cache_file_path, 'wb') as file:
                    pickle.dump(cached_data, file)

            time.sleep(2)  # Adds a 2-second delay to avoid rate limits

        if track_id in cached_data:
            track_names.append(cached_data[track_id]['name'])
            features.append(cached_data[track_id]['features'])

    print('Done!')
    candidates_df = pd.DataFrame(features, index=track_names)
    
    candidates_clean = cleaning_df(candidates_df)

    csv_file_path = './spotify_data_cache/candidates.csv'
    if os.path.exists(csv_file_path):
        existing_data = pd.read_csv(csv_file_path)
        updated_data = pd.concat([existing_data, candidates_clean]).drop_duplicates()
    else:
        updated_data = candidates_clean

    updated_data.to_csv(csv_file_path, index=False)
    print("CSV file updated.")

    return updated_data

In [None]:
updated_data  = get_track_details_and_features(sp, id_tracks)

Processing track 1 of 2238...
Processing track 2 of 2238...
Processing track 3 of 2238...
Processing track 4 of 2238...
Processing track 5 of 2238...
Processing track 6 of 2238...
Processing track 7 of 2238...
Processing track 8 of 2238...
Processing track 9 of 2238...


KeyboardInterrupt: 

In [None]:
top_tracks_clean_mtx = top_tracks_clean.iloc[:,1:].values
candidatos_mtx = updated_data.iloc[:,1:].values

NameError: name 'updated_data' is not defined

In [None]:
scaler = StandardScaler()
t20_scaled = scaler.fit_transform(top_tracks_clean_mtx)
can_scaled = scaler.fit_transform(candidatos_mtx)
t20_norm = np.sqrt((t20_scaled*t20_scaled).sum(axis=1))
can_norm = np.sqrt((can_scaled*can_scaled).sum(axis=1))
nt20 = t20_scaled.shape[0]
ncan = can_scaled.shape[0]
t20 = t20_scaled/t20_norm.reshape(nt20,1)
can = can_scaled/can_norm.reshape(ncan,1)
cos_sim = linear_kernel(t20,can)
cos_sim.shape

In [None]:
def obtener_candidatos(pos, cos_sim, ncands, umbral = 0.8):
    
    # Obtener todas las pistas candidatas por encima de umbral
    
    idx = np.where(cos_sim[pos,:]>=umbral)[0] 
    
    # Y organizarlas de forma descendente (por similitudes de mayor a menor)
    idx = idx[np.argsort(cos_sim[pos,idx])[::-1]] # [::-1] porque por defecto argsort organiza de manera ascendente

    # Si hay más de "ncands", retornar máximo "ncands"
    if len(idx) >= ncands:
        cands = idx[0:ncands]
    else:
        cands = idx
  
    return cands

In [None]:
ids_t20 = []
ids_playlist = []

for i in range(top_tracks_clean_mtx.shape[0]):
    print(top_tracks_clean_mtx.index[i]) 
    ids_t20.append(top_tracks_clean_mtx['id'][i])
    
    cands = obtener_candidatos(i, cos_sim, 5, umbral=0.8)
    
    if len(cands)==0:
        print('     ***No se encontraron pistas relacionadas***')
    else:
        for j in cands:
            id_cand = candidatos_mtx['id'][j]
            ids_playlist.append(id_cand)
            
            print(f'   {candidatos_mtx.index[j]}')

In [None]:
ids_playlist_dep = [x for x in ids_playlist if x not in ids_t20]
ids_playlist_dep = list(set(ids_playlist_dep))

In [None]:
pl = sp.user_playlist_create(user=username, 
                             name='Spotipy Recommender Playlist', 
                             description="Playlist created with the recommendation system")

sp.playlist_add_items(pl['id'],ids_playlist_dep)