###Make spotipy work

In [None]:
!pip install spotipy
!pip install tqdm

In [1]:
%%capture
import networkx as nx
import pandas as pd
import spotipy
from spotipy . oauth2 import SpotifyClientCredentials
import matplotlib.pyplot as plt
from tqdm import tqdm
import time

In [2]:
CLIENT_ID = "83251742b516446a95953a9b9236c75a"
CLIENT_SECRET = "3537256ef68748ad8422e190a7740c58"

auth_manager = SpotifyClientCredentials (client_id = CLIENT_ID, client_secret = CLIENT_SECRET)
sp = spotipy . Spotify ( auth_manager = auth_manager )

###Implement functions

####Final result

In [3]:
# ------- IMPLEMENT HERE ANY AUXILIARY FUNCTIONS NEEDED ------- #
def add_node(graph, name, id):  
  raw = sp.artist(id) 

  genre = ''
  for g in raw['genres']:
    genre = genre+g+','

  graph.add_node(name, 
                 id = raw['id'], 
                 followers = raw['followers']['total'],
                 popularity = raw['popularity'],
                 genres = genre)


def explore_node(graph, name='', id=''):  
  if id=='':    
    id = search_artist(name)
  elif name=='':    
    name = sp.artist(id)['name']
  
  related = sp.artist_related_artists(id)  
  neighbours = [artist['name'] for artist in related['artists']]
  add_node(graph, name, id)

  for partner in neighbours: 
    add_node(graph, partner, search_artist(partner))
    graph.add_edge(name, partner)  
  
  return neighbours

# --------------- END OF AUXILIARY FUNCTIONS ------------------ #


def search_artist(artist_name: str) -> str:
    """
    Search for an artist in Spotify.

    :param artist_name: name to search for.
    :return: spotify artist id.
    """
    # ------- IMPLEMENT HERE THE BODY OF THE FUNCTION ------- #
    searched = sp.search(artist_name, type='artist', limit=1)
    if searched['artists']['items'] != []:
      return searched['artists']['items'][0]['id']  
    # ----------------- END OF FUNCTION --------------------- #


def crawler(seed: str, max_nodes_to_crawl: int, strategy: str = "BFS", out_filename: str = "g.graphml") -> nx.DiGraph:
    """
    Crawl the Spotify artist graph, following related artists.

    :param seed: starting artist id.
    :param max_nodes_to_crawl: maximum number of nodes to crawl.
    :param strategy: BFS or DFS.
    :param out_filename: name of the graphml output file.
    :return: networkx directed graph.
    """
    # ------- IMPLEMENT HERE THE BODY OF THE FUNCTION ------- #
    Graph = nx.DiGraph()
    n_nodes = 1    
    to_explore = explore_node(Graph, id = seed)     
    visited = []    

    while (n_nodes < max_nodes_to_crawl) and (len(to_explore) != 0):
      if strategy=='BFS':        
        current = to_explore.pop(0)         
        while current in visited:
          current = to_explore.pop(0)                     
        to_explore = to_explore + explore_node(Graph, name = current)
        n_nodes+=1      
        visited.append(current) 
      
      elif strategy=='DFS':
        current = to_explore.pop(0)
        while current in visited:
          current = to_explore.pop(0)                   
        to_explore = explore_node(Graph, name = current) + to_explore        
        n_nodes+=1     
        visited.append(current)       

    nx.write_graphml(Graph, out_filename+".graphml")
    nx.draw_spring(Graph)
    return Graph 
    # ----------------- END OF FUNCTION --------------------- #

def get_track_data(graphs: list, out_filename: str) -> pd.DataFrame:
    """
    Get track data for each visited artist in the graph.

    :param graphs: a list of graphs with artists as nodes.
    :param out_filename: name of the csv output file.
    :return: pandas dataframe with track data.
    """
    artists = set()

    for graph in graphs:
        for artist in graph.nodes:
            artists.add((artist, graph.nodes[artist]['id']))

    columns = ['Name', 'ID', 'Popularity']
    data = []

    delay = 10  # Delay in seconds between API calls
    progress_bar = tqdm(total=len(artists), desc="Processing Artists", unit="artist")

    for element in artists:
        singer = element[0]
        singer_id = element[1]

        tracks = sp.artist_top_tracks(singer_id)['tracks']

        progress_bar.set_description(f"Processing Artist: {singer}")
        progress_bar.refresh()

        for track in tracks:
            audio_features = sp.audio_features(track['id'])[0]
            if audio_features is not None:
                info = [
                    track['name'],
                    track['id'],
                    track['popularity'],
                ]
                data.append(info)

        # Save data to CSV file
        table = pd.DataFrame(data, columns=columns).sort_values('Popularity', ascending=False)
        table.to_csv(out_filename + '.csv', index=False)

        # Add delay before making the next API call
        time.sleep(delay)

        progress_bar.update(1)

    progress_bar.close()

    table = pd.DataFrame(data, columns=columns).sort_values('Popularity', ascending=False)
    table.to_csv(out_filename + '.csv', index=False)

    return table
    # ----------------- END OF FUNCTION --------------------- #


if __name__ == "__main__":
    artist_name = "Drake"  # Replace with the name of the artist you want to search for
    seed_artist_id_Drake = search_artist(artist_name)
    artist_name = "French Montana"  # Replace with the name of the artist you want to search for
    seed_artist_id_FM = search_artist(artist_name)
    

####Exercises

#####No need to run

In [None]:
gB = crawler(seed_artist_id_Drake, 200, 'BFS', 'gB')

In [None]:
gD = crawler(seed_artist_id_Drake, 200, 'DFS', 'gD')

#####Run this (and download the results)

In [4]:
import networkx as nx

gB = nx.read_graphml('gB.graphml')
gD = nx.read_graphml('gD.graphml')

In [5]:
D = get_track_data([gB, gD], 'D')

Processing Artist: Joey Fatts:   0%|          | 0/1517 [00:00<?, ?artist/s]Max Retries reached


SpotifyException: http status: 429, code:-1 - /v1/audio-features/?ids=1ccxiPcBci9D6U8l39fD0w:
 Max Retries, reason: too many 429 error responses

In [None]:
hB = crawler(seed_artist_id_FM, 200, 'BFS', 'hB')

###Others

In [None]:
def get_track_data(graphs: list, out_filename: str) -> pd.DataFrame:
    """
    Get track data for each visited artist in the graph.

    :param graphs: a list of graphs with artists as nodes.
    :param out_filename: name of the csv output file.
    :return: pandas dataframe with track data.
    """
    # ------- IMPLEMENT HERE THE BODY OF THE FUNCTION ------- #
    artists = set()

    for graph in graphs:
      for artist in list(graph.nodes):
        artists.add(artist)
    
    columns = ['Name', 'ID', 'Popularity', 'Duration','Artist Name', 'Artist ID', 'Danceability', 'Energy', 'Loudness', 'Speechiness', 'Acousticness', 'Instrumentalness', 'Liveness', 'Valence', 'Tempo', 'Album Name', 'Album ID', 'Album Release Date']
    data = []

    for element in list(artists):
      singer = element
      singer_id = search_artist(singer)

      tracks = sp.artist_top_tracks(singer_id, 'ES')['tracks']

      for track in tracks:  
        audio_features = sp.audio_features(track['id'])[0]

        info = [
          track['name'],
          track['id'],
          track['popularity'],
          track['duration_ms'],
          singer,
          singer_id,
          audio_features['danceability'],
          audio_features['energy'],
          audio_features['loudness'],
          audio_features['speechiness'],
          audio_features['acousticness'],
          audio_features['instrumentalness'],
          audio_features['liveness'],
          audio_features['valence'],
          audio_features['tempo'],
          track['album']['name'],
          track['album']['id'],
          track['album']['release_date']
        ]

        data.append(info)

    table = pd.DataFrame(data, columns=columns).sort_values('Popularity', ascending=False)
    table.to_csv(out_filename, index=False)

    return table
    # ----------------- END OF FUNCTION --------------------- #