In [1]:
import pandas as pd
import sys

recommenders_path = 'Recommenders'
sys.path.append(recommenders_path)

graphs_path = 'Graphs'
sys.path.append(graphs_path)

artist_path = 'Artists'
sys.path.append(artist_path)

from Recommenders.PopularityRecommender import PopularityRecommender
from Graphs.BipartiteGraph import BipartiteGraph

In [2]:
df = pd.read_csv('../data/spotify_artists.csv') # without popularity filter
df

Unnamed: 0,external_urls,followers,genres,id,name,popularity,uri
0,https://open.spotify.com/artist/4dpARuHxo51G3z...,57657211,"british soul, pop, pop soul, uk pop",4dpARuHxo51G3z768sgnrY,Adele,86,spotify:artist:4dpARuHxo51G3z768sgnrY
1,https://open.spotify.com/artist/66CXWjxzNUsdJx...,98814719,pop,66CXWjxzNUsdJxJ2JdwvnR,Ariana Grande,91,spotify:artist:66CXWjxzNUsdJxJ2JdwvnR
2,https://open.spotify.com/artist/13ubrt8QOOCPlj...,14722549,"east coast hip hop, hip hop, rap",13ubrt8QOOCPljQ2FL1Kca,A$AP Rocky,84,spotify:artist:13ubrt8QOOCPljQ2FL1Kca
3,https://open.spotify.com/artist/06HL4z0CvFAxyc...,118971229,pop,06HL4z0CvFAxyc27GXpf02,Taylor Swift,100,spotify:artist:06HL4z0CvFAxyc27GXpf02
4,https://open.spotify.com/artist/7Ln80lUS6He07X...,25991892,"garage rock, modern rock, permanent wave, rock...",7Ln80lUS6He07XvHI8qqHH,Arctic Monkeys,85,spotify:artist:7Ln80lUS6He07XvHI8qqHH
...,...,...,...,...,...,...,...
20486,https://open.spotify.com/artist/3xx17YXYvPl9PB...,56415,"contemporary vocal jazz, ectofolk, folk, lilith",3xx17YXYvPl9PBx6Jzalk4,Shelby Lynne,48,spotify:artist:3xx17YXYvPl9PBx6Jzalk4
20487,https://open.spotify.com/artist/3jvWpZJpokYCoT...,35290,"alternative emo, emo, midwest emo",3jvWpZJpokYCoT0QV4OJg0,You Blew It!,33,spotify:artist:3jvWpZJpokYCoT0QV4OJg0
20488,https://open.spotify.com/artist/53HxKgLPRmkmUq...,72,,53HxKgLPRmkmUqzOv7qVL3,Z,0,spotify:artist:53HxKgLPRmkmUqzOv7qVL3
20489,https://open.spotify.com/artist/0SYlWdOsemdRbG...,0,,0SYlWdOsemdRbGNNU6FF3h,Z,0,spotify:artist:0SYlWdOsemdRbGNNU6FF3h


In [3]:
df = df.dropna(subset=['genres'])
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,external_urls,followers,genres,id,name,popularity,uri
0,https://open.spotify.com/artist/4dpARuHxo51G3z...,57657211,"british soul, pop, pop soul, uk pop",4dpARuHxo51G3z768sgnrY,Adele,86,spotify:artist:4dpARuHxo51G3z768sgnrY
1,https://open.spotify.com/artist/66CXWjxzNUsdJx...,98814719,pop,66CXWjxzNUsdJxJ2JdwvnR,Ariana Grande,91,spotify:artist:66CXWjxzNUsdJxJ2JdwvnR
2,https://open.spotify.com/artist/13ubrt8QOOCPlj...,14722549,"east coast hip hop, hip hop, rap",13ubrt8QOOCPljQ2FL1Kca,A$AP Rocky,84,spotify:artist:13ubrt8QOOCPljQ2FL1Kca
3,https://open.spotify.com/artist/06HL4z0CvFAxyc...,118971229,pop,06HL4z0CvFAxyc27GXpf02,Taylor Swift,100,spotify:artist:06HL4z0CvFAxyc27GXpf02
4,https://open.spotify.com/artist/7Ln80lUS6He07X...,25991892,"garage rock, modern rock, permanent wave, rock...",7Ln80lUS6He07XvHI8qqHH,Arctic Monkeys,85,spotify:artist:7Ln80lUS6He07XvHI8qqHH
...,...,...,...,...,...,...,...
14999,https://open.spotify.com/artist/1NrqtKwAa1VXMW...,103432,rap kreyol,1NrqtKwAa1VXMWxREFlPPB,Wendyyy,39,spotify:artist:1NrqtKwAa1VXMWxREFlPPB
15000,https://open.spotify.com/artist/2x49HGCVPqbRxe...,76890,indonesian lo-fi pop,2x49HGCVPqbRxecj0PZq2R,Yahya,49,spotify:artist:2x49HGCVPqbRxecj0PZq2R
15001,https://open.spotify.com/artist/3xx17YXYvPl9PB...,56415,"contemporary vocal jazz, ectofolk, folk, lilith",3xx17YXYvPl9PBx6Jzalk4,Shelby Lynne,48,spotify:artist:3xx17YXYvPl9PBx6Jzalk4
15002,https://open.spotify.com/artist/3jvWpZJpokYCoT...,35290,"alternative emo, emo, midwest emo",3jvWpZJpokYCoT0QV4OJg0,You Blew It!,33,spotify:artist:3jvWpZJpokYCoT0QV4OJg0


In [4]:
artists_ids = {}
for i in range(len(df)):
    artists_ids[df['id'][i]] = df['name'][i]

### CREATE EDGES AND NODES

Edges structure: (artist_id, artist_genre, artist_popularity)

In [5]:
edges = []
for i in range(len(df)):
    if type(df['genres'][i]) != float: # it means that the genre is not NaN
        genres_artist = [item.strip() for item in df['genres'][i].split(',')]
        for g in genres_artist:
            edge = (df['id'][i], g, df['popularity'][i])
            edges.append(edge)

Two types of nodes: artists and genres. Each artist is connected to every genre they have produced.

In [6]:
artists_nodes = list(df['id'])
genres_nodes = set([item.strip() for sublist in df['genres'].dropna().apply(lambda x: x.split(',')) for item in sublist])

### CREATE GRAPH

In [7]:
G = BipartiteGraph()
G.add_nodes(artists_nodes, genres_nodes)
G.add_edges(edges)

In [11]:
lista = []
for node in G.get_nodes():
    if node in list(df['genres']):
        if G.get_degree(node) == 1:
            lista.append(node)

### POPULARITY RECOMMENDER

In [8]:
artist_id = '3zgnrYIltMkgeejmvMCnes'

In [9]:
number_recommendations = 20
recommender = PopularityRecommender(G, number_recommendations)

In [10]:
recommender.make_recommendations(artist_id)
recommender.convert_recommendations(artist_id, artists_ids)

recommended artists based on IZA:
1: Anitta
2: Alok
3: Pabllo Vittar
4: Matheus & Kauan
5: LUDMILLA
6: Luísa Sonza
7: Matuê
8: Djonga
9: Luccas Carlos
10: Gaab
11: Lourena
12: Emicida
13: Preto no Branco
14: Flora Matos
15: Go Dassisti
16: Vitão
17: Negra Li
18: IVYSON
19: Xênia França
20: Ubunto
