In [1]:
#install required API spotify library
!pip install spotipy



In [1]:
import spotipy
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import pickle

In [2]:
client_id = '1deb8af0ef8f4ef38a0000a5039c146a'
client_secret = 'af6f61a89dac4a1c98dff51a6a45413f'

In [3]:
from spotipy.oauth2 import SpotifyClientCredentials

In [4]:
credmanager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(
    client_credentials_manager=credmanager,
    requests_timeout=20  # secondi
)

In [5]:
def artist_features(spotify_search_result):
    result = {
        'artist_name': spotify_search_result.get('name', 'artist_name_not_available'),
        'artist_id': spotify_search_result.get('id', 'artist_id_not_available'),
        'artist_popularity': spotify_search_result.get('popularity', 0),
        'artist_first_genre': (spotify_search_result.get('genres', ['genre_not_available']) + ['genre_not_available'])[0],
        'artist_n_followers': spotify_search_result.get('followers', {}).get('total', 0),
    }
    return result

In [6]:
drake_search = sp.search('Drake', type='artist')['artists']['items'][0]

In [7]:
drake_features = artist_features(drake_search)
drake_features

{'artist_name': 'Drake',
 'artist_id': '3TVXtAsR1Inumwj472S9r4',
 'artist_popularity': 99,
 'artist_first_genre': 'rap',
 'artist_n_followers': 100970883}

In [8]:
from bs4 import BeautifulSoup
import urllib.request

In [9]:
#use as reference https://kworb.net/spotify/artists.html to find the top 3000 top stremed artists
fp = urllib.request.urlopen("https://kworb.net/spotify/artists.html")
mybytes = fp.read()
mystr = mybytes.decode("utf8")
fp.close()


In [10]:
def remove_bound(string):
  string = str(string)
  string = string.split('>')[1]
  string = string.split('<')[0]
  return string

In [11]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(mystr, 'html.parser')
artists_html = soup.find_all('a')
artist_names = [remove_bound(a) for a in artists_html][14:]  # escludi intestazioni e link inutili
artist_names = [name.replace('&amp;', '&') for name in artist_names]  # decode HTML entity

print(f"Artisti trovati: {len(artist_names)}")
print("Primi 10:", artist_names[:10])

Artisti trovati: 3000
Primi 10: ['Drake', 'Taylor Swift', 'Bad Bunny', 'The Weeknd', 'Justin Bieber', 'Ed Sheeran', 'Ariana Grande', 'Travis Scott', 'Eminem', 'Kanye West']


In [12]:
(len(artist_names), artist_names[:10])

(3000,
 ['Drake',
  'Taylor Swift',
  'Bad Bunny',
  'The Weeknd',
  'Justin Bieber',
  'Ed Sheeran',
  'Ariana Grande',
  'Travis Scott',
  'Eminem',
  'Kanye West'])

In [13]:
artists_name_list = artist_names
print('There are', len(artists_name_list), 'artists in the initial list.')

There are 3000 artists in the initial list.


In [14]:
#we set a popularity_threshold for the artists to add in the graph in order to avoid too "noisy" elements (artists with very small population/relevance)
popularity_threshold = 20

In [15]:
G = nx.Graph()

In [16]:
def get_collaborators(artist_id, max_albums=5):
    """
    Restituisce una lista di collaboratori (altri artisti) che hanno lavorato
    con l'artista specificato, basandosi sui brani dei suoi album/singoli.

    Parameters:
        artist_id (str): ID Spotify dell'artista principale
        max_albums (int): Numero massimo di album da esplorare (default: 5)

    Returns:
        List[Tuple[str, str]]: Lista di tuple (id, nome) dei collaboratori
    """
    collaborators = set()
    try:
        albums = sp.artist_albums(artist_id, include_groups='album,single', limit=max_albums)['items']
        for album in albums:
            tracks = sp.album_tracks(album['id'])['items']
            for track in tracks:
                for artist in track['artists']:
                    if artist['id'] != artist_id:
                        collaborators.add((artist['id'], artist['name']))
    except Exception as e:
        print(f"Errore nel recupero collaboratori per {artist_id}: {e}")
    return list(collaborators)

In [17]:
with open("grafo_parziale.pkl", "rb") as f:
    G = pickle.load(f)

In [18]:
start_index = 2811  # o il numero che ricordi (es. 620, 640…)

In [19]:
import time

In [20]:
# Continua dal punto in cui eri rimasto
for idx, name in enumerate(artists_name_list[start_index:], start=start_index):
    print(f"[{idx+1}/{len(artists_name_list)}] Analizzo artista: {name}")
    try:
        search = sp.search(q=name, type='artist', limit=1)['artists']['items']
        if not search:
            continue
        artist = search[0]
        if artist['popularity'] < popularity_threshold:
            continue

        artist_data = artist_features(artist)
        artist_id = artist_data['artist_id']

        if artist_id in G:
            continue  # Già elaborato

        G.add_node(artist_id, **artist_data)

        # Trova collaboratori
        collaborators = get_collaborators(artist_id)
        for collab_id, collab_name in collaborators:
            if not G.has_node(collab_id):
                try:
                    collab_data = sp.artist(collab_id)
                    collab_feat = artist_features(collab_data)
                    if collab_feat['artist_popularity'] >= popularity_threshold:
                        G.add_node(collab_id, **collab_feat)
                except:
                    continue
            if G.has_node(collab_id):
                G.add_edge(artist_id, collab_id)

        # Salva ogni 10 artisti
        if idx % 10 == 0:
            with open("grafo_parziale.pkl", "wb") as f:
                pickle.dump(G, f)
            print(f"💾 Grafo salvato a {idx} nodi.")

        time.sleep(1)

    except spotipy.exceptions.SpotifyException as se:
        if se.http_status == 429:
            retry_after = int(se.headers.get("Retry-After", 60))
            print(f"⏳ Rate limit raggiunto. Attendo {retry_after} secondi...")
            time.sleep(retry_after + 5)
            continue
        else:
            print(f"Errore Spotify per {name}: {se}")
            continue

    except Exception as e:
        print(f"Errore generico per {name}: {e}")
        continue

# Salvataggio finale
with open("grafo_finale.pkl", "wb") as f:
    pickle.dump(G, f)

print("✅ Raccolta completata. Grafo salvato.")

[2812/3000] Analizzo artista: Academy of St. Martin in the Fields
[2813/3000] Analizzo artista: Vengaboys
[2814/3000] Analizzo artista: Charlie Wilson
[2815/3000] Analizzo artista: Yura Yunita
[2816/3000] Analizzo artista: Lali
[2817/3000] Analizzo artista: Monica
[2818/3000] Analizzo artista: Bresh
[2819/3000] Analizzo artista: Hunter Hayes
[2820/3000] Analizzo artista: Sujatha
[2821/3000] Analizzo artista: Suki Waterhouse
[2822/3000] Analizzo artista: Brian Eno
[2823/3000] Analizzo artista: Los Panchos
[2824/3000] Analizzo artista: Eddie Vedder
[2825/3000] Analizzo artista: Poison
[2826/3000] Analizzo artista: Ol' Dirty Bastard
[2827/3000] Analizzo artista: VINAI
[2828/3000] Analizzo artista: Charly Black
[2829/3000] Analizzo artista: Yovie & Nuno
[2830/3000] Analizzo artista: Vigiland
[2831/3000] Analizzo artista: Joey Montana
[2832/3000] Analizzo artista: Uzielito Mix
[2833/3000] Analizzo artista: COIN
[2834/3000] Analizzo artista: Migrantes
[2835/3000] Analizzo artista: Sergio Veg

In [21]:
# save graph object to file
pickle.dump(G, open('grafo_finale.pickle', 'wb'))

In [22]:
G_raw = nx.Graph()
for u, v in G.edges():
  G_raw.add_edge(u,v)
nx.write_gexf(G_raw,"grafo_finale.gexf")