# **IMPORT LIBS**

In [1]:
import pandas as pd
import sys
from collections import Counter
from tqdm import tqdm

recommenders_path = 'Recommenders'
sys.path.append(recommenders_path)

graphs_path = 'Graphs'
sys.path.append(graphs_path)

artist_path = 'Artists'
sys.path.append(artist_path)

graphics_path = 'Graphics'
sys.path.append(graphics_path)

from Recommenders.PopularityRecommender import PopularityRecommender
from Recommenders.StrongerConnectionsRecommender import StrongerConnectionsRecommender
from Graphs.BipartiteGraph import BipartiteGraph
from Graphs.SimpleGraph import SimpleGraph

# **Loading the data**

In [2]:
df = pd.read_csv('../data/spotify_artists.csv')
df

Unnamed: 0,external_urls,followers,genres,id,name,popularity,uri,related_artists_ids
0,https://open.spotify.com/artist/4dpARuHxo51G3z...,57657211,"british soul, pop, pop soul, uk pop",4dpARuHxo51G3z768sgnrY,Adele,86,spotify:artist:4dpARuHxo51G3z768sgnrY,"5WUlDfRSoLAfcVSX1WnrxN, 2wY79sveU1sp5g7SokKOiI..."
1,https://open.spotify.com/artist/66CXWjxzNUsdJx...,98814719,pop,66CXWjxzNUsdJxJ2JdwvnR,Ariana Grande,91,spotify:artist:66CXWjxzNUsdJxJ2JdwvnR,"4nDoRrQiYLoBzwC5BhVJzF, 0C8ZW7ezQVs4URX5aX7Kqx..."
2,https://open.spotify.com/artist/13ubrt8QOOCPlj...,14722549,"east coast hip hop, hip hop, rap",13ubrt8QOOCPljQ2FL1Kca,A$AP Rocky,84,spotify:artist:13ubrt8QOOCPljQ2FL1Kca,"5dHt1vcEm9qb8fCyLcB3HL, 2P5sC9cVZDToPxyomzF1UH..."
3,https://open.spotify.com/artist/06HL4z0CvFAxyc...,118971229,pop,06HL4z0CvFAxyc27GXpf02,Taylor Swift,100,spotify:artist:06HL4z0CvFAxyc27GXpf02,"0C8ZW7ezQVs4URX5aX7Kqx, 1McMsnEElThX1knmY4oliG..."
4,https://open.spotify.com/artist/7Ln80lUS6He07X...,25991892,"garage rock, modern rock, permanent wave, rock...",7Ln80lUS6He07XvHI8qqHH,Arctic Monkeys,85,spotify:artist:7Ln80lUS6He07XvHI8qqHH,"77SW9BnxLY8rJ0RciFqkHh, 0epOFNiUfyON9EYx7Tpr6V..."
...,...,...,...,...,...,...,...,...
14783,https://open.spotify.com/artist/7EyzyrMNgqiK8b...,1417807,rap mineiro,7EyzyrMNgqiK8bMrbkOT9l,Sidoka,59,spotify:artist:7EyzyrMNgqiK8bMrbkOT9l,"4oPnjkJcLqOim9KJxvIYMz, 1QBWA6tuiZ0JuDluPqbe71..."
14784,https://open.spotify.com/artist/6gzXCdfYfFe5XK...,2313438,"k-pop, k-pop boy group",6gzXCdfYfFe5XKhPKkYqxV,SUPER JUNIOR,58,spotify:artist:6gzXCdfYfFe5XKhPKkYqxV,"6nVMMEywS5Y4tsHPKx1nIo, 1bkpTEmumLC3xc7HgMsttU..."
14785,https://open.spotify.com/artist/7AAXcP4NpvvLM9...,609413,"sertanejo, sertanejo universitario",7AAXcP4NpvvLM9Xcfy64ij,Hugo Pena & Gabriel,40,spotify:artist:7AAXcP4NpvvLM9Xcfy64ij,"5Dyg6H3QJHQV5c7ojyKWyv, 4ZUahcHoVxr4lsrfdmhjhV..."
14786,https://open.spotify.com/artist/5KvkOKroKLz202...,186296,"folk metal, german metal, melodic death metal,...",5KvkOKroKLz202ioXfGWR2,Equilibrium,49,spotify:artist:5KvkOKroKLz202ioXfGWR2,3t5X2CVDf5mrlIx1SdvWYM


# **Creating the Bipartite Network (genres and artists)**

In [3]:
edges = []
for i in range(len(df)):
    genres_artist = df['genres'][i].split(', ')
    for g in genres_artist:
        edge = (df['id'][i], g, df['popularity'][i])
        edges.append(edge)

In [4]:
artists_nodes = [(df['id'][index], dict(popularity=int(df['popularity'][index]), name=str(df['name'][index]))) for index in range(len(df['id']))]
genres_nodes = set([item.strip() for sublist in df['genres'].dropna().apply(lambda x: x.split(',')) for item in sublist])

In [5]:
Bipartite_G = BipartiteGraph()
Bipartite_G.add_nodes(artists_nodes, genres_nodes)
Bipartite_G.add_edges(edges)

# **Creating the Simple Network (artists)**

In [6]:
genres = {}
for index in range(len(df)):
    for genre_artist in df['genres'][index].split(', '):
        if genre_artist not in genres: genres[genre_artist] = []
        genres[genre_artist].append(df['id'][index])

In [7]:
connections = []
for genre in genres:
    for i in range(len(genres[genre])):
        for j in range(i+1, len(genres[genre])):
           artist_1, artist_2 = (genres[genre][i], genres[genre][j]) if genres[genre][i] < genres[genre][j] else (genres[genre][j], genres[genre][i])
           connections.append((artist_1, artist_2))

In [8]:
counter = Counter(connections)
edges = []
for artists, weight in counter.items():
    artist_1, artist_2 = artists
    edges.append((artist_1, artist_2, weight))

In [9]:
G = SimpleGraph()
artists_nodes = [(df['id'][index], dict(popularity=int(df['popularity'][index]), name=str(df['name'][index]), genre=str(df['genres'][index].split(', ')[0]))) for index in range(len(df['id']))]
G.add_nodes(artists_nodes)
G.add_edges(edges)

# **Functions**

## **Function to compare the recommenders with Spotify**

In [10]:
def compare_recommendations_spotify(df, recommender):
    analysis = {}
    counter_nan=0

    for index in tqdm(range(len(df))):

        if type(df['related_artists_ids'][index]) == float: 
            counter_nan+=1
            continue

        related_artists_ids = df['related_artists_ids'][index].split(', ')
        number_recommendations = len(related_artists_ids)

        recommended_artists = recommender.make_recommendations(df['id'][index], number_recommendations)

        counter=0
        for artist_id in recommended_artists:
            if artist_id in related_artists_ids:
                counter+=1
        
        analysis[df['id'][index]] =  counter/len(recommended_artists)

    total_correct_mean = sum(analysis.values())/len(analysis.values())
    print(f'On average, {total_correct_mean:.2%} of the recommendations from the {type(recommender).__name__} is equal to the recommendations made by Spotify')
    print(f'{counter_nan} artists (out of {len(df)}) had no related artists, either because the API did not provide any or because they were removed')

    return analysis

## **Function to compare the created recommenders**

In [11]:
def compare_recommendations_created_recommenders(df, primary_recommender, secondary_recommender):
    analysis = {}
    counter_nan=0
    
    for index in tqdm(range(len(df))):

        recommender_primary_recommender = primary_recommender
        recommended_artists_primary_recommender = recommender_primary_recommender.make_recommendations(df['id'][index], 20)

        number_recommendations = len(recommended_artists_primary_recommender)

        if number_recommendations == 0:
            counter_nan+=1
            continue

        recommender_secondary_recommender = secondary_recommender
        recommended_artists_secondary_recommender = recommender_secondary_recommender.make_recommendations(df['id'][index], number_recommendations)

        counter=0
        for artist_id in recommended_artists_secondary_recommender:
            if artist_id in recommended_artists_primary_recommender:
                counter+=1
                
        analysis[df['id'][index]] =  counter/len(recommended_artists_secondary_recommender) 

    total_correct_mean = sum(analysis.values())/len(analysis.values())
    print(f'On average, {total_correct_mean:.2%} of the recommendations from the {type(secondary_recommender).__name__} recommender is equal to the recommendations made by {type(primary_recommender).__name__}')
    print(f'{counter_nan} artists (out of {len(df)}) had no related artists, either because the API did not provide any or because they were removed')

    return analysis

# **Comparing recommenders**

## **Checking how many of the recommendations made by the Popularity Recommender match those from Spotify**

In [12]:
analysis_popularity_recommender = compare_recommendations_spotify(df, PopularityRecommender(Bipartite_G))

100%|██████████| 14788/14788 [00:04<00:00, 3295.06it/s]

On average, 10.06% of the recommendations from the PopularityRecommender is equal to the recommendations made by Spotify
1228 artists (out of 14788) had no related artists, either because the API did not provide any or because they were removed





## **Checking how many of the recommendations made by the Stronger Connections Recommender match those from Spotify**

In [13]:
analysis_stronger_connections_recommender = compare_recommendations_spotify(df, StrongerConnectionsRecommender(G))

100%|██████████| 14788/14788 [00:02<00:00, 7375.27it/s]

On average, 17.89% of the recommendations from the StrongerConnectionsRecommender is equal to the recommendations made by Spotify
1228 artists (out of 14788) had no related artists, either because the API did not provide any or because they were removed





## **Checking how many of the recommendations made by the Popularity Recommender match those from Stronger Connections Recommender**

In [14]:
analysis_popularity_recommender_2 = compare_recommendations_created_recommenders(df, StrongerConnectionsRecommender(G), PopularityRecommender(Bipartite_G))

100%|██████████| 14788/14788 [00:06<00:00, 2212.75it/s]

On average, 70.70% of the recommendations from the PopularityRecommender recommender is equal to the recommendations made by StrongerConnectionsRecommender
0 artists (out of 14788) had no related artists, either because the API did not provide any or because they were removed





## **Checking how many of the recommendations made by the Stronger Connections Recommender match those from Popularity Recommender**

In [15]:
analysis_stronger_connections_recommender_2 = compare_recommendations_created_recommenders(df, PopularityRecommender(Bipartite_G), StrongerConnectionsRecommender(G))

100%|██████████| 14788/14788 [00:06<00:00, 2132.63it/s]

On average, 70.97% of the recommendations from the StrongerConnectionsRecommender recommender is equal to the recommendations made by PopularityRecommender
0 artists (out of 14788) had no related artists, either because the API did not provide any or because they were removed



