In [1]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import itertools

import networkx as nx
import igviz as ig

In [2]:
coartists = []
with open('all_artist_release_credits_cleaned.txt', 'r', encoding='utf-8') as f:
    for line in f.readlines():
        coartists.append(line)
        #print(line)
len(coartists)

96600

In [3]:
coartists = set(coartists)
coartists = [line.split('<--->') for line in coartists]
coartists = [line[:-1] for line in coartists]
for line in coartists:
    line[-1] = int(line[-1])
    
len(coartists)

56967

In [4]:
filename = 'cleaned_artists_dict'
infile = open(filename,'rb')
artist_dict = pickle.load(infile)
infile.close()

In [5]:
artist_dict.keys()

dict_keys(['country_mb', 'tags_mb', 'listeners_lastfm'])

In [6]:
#artist_dict

In [7]:
G = nx.Graph()
for colab in coartists:
    if len(colab) > 1:
        for combo in itertools.combinations(colab[:-1], 2):
            if not G.has_edge(*combo):
                G.add_edge(*combo,weight=colab[-1]/len(colab[:-1])) # distribute weight so if 2 artists colab, they each get half credit
            else:
                G[combo[0]][combo[1]]['weight'] += colab[-1]/len(colab[:-1])
                
nx.set_node_attributes(G, artist_dict['country_mb'], 'country')
nx.set_node_attributes(G, artist_dict['tags_mb'], 'tags')
nx.set_node_attributes(G, artist_dict['listeners_lastfm'], 'listeners')

# get artist nodes with no data
artists_with_no_data = []
for n, data in G.nodes(data=True):
    if not data:
        artists_with_no_data.append(n)
print('# artists with no data: ', len(artists_with_no_data))

# remove artist nodes with no data
for n in artists_with_no_data:
    G.remove_node(n)

# save network
net_name = 'artists_network_norm_all_attrs'
nx.write_weighted_edgelist(G, net_name+'.edgelist', encoding='utf-8', delimiter='<--->')
nx.write_gml(G, net_name+'.gml')

# artists with no data:  29643


In [81]:
print('Nodes: ', len(G.nodes))
print('Edges: ', len(G.edges))

Nodes:  12761
Edges:  31473


In [82]:
G.nodes['kanye west']

{'country': 'united states', 'tags': ['pop'], 'listeners': 0.8158408136514887}

In [83]:
# Get list of unique genre tags
unique_genre_tags = []
for n, data in G.nodes(data=True):
    unique_genre_tags.append(G.nodes[n]['tags'])
    #print(G.nodes[n]['tags'])

unique_genre_tags = [str(tag) for tag in unique_genre_tags]
unique_genre_tags = set(unique_genre_tags)
print('# unique_genre_tags: ',len(unique_genre_tags))

# unique_genre_tags:  54


In [75]:
unique_genre_tags

{"['00s']",
 "['10s']",
 "['1980s']",
 "['2000s']",
 "['60s']",
 "['70s']",
 "['80s']",
 "['90s']",
 "['alternative country']",
 "['alternative dance']",
 "['alternative hip hop']",
 "['alternative metal']",
 "['alternative pop']",
 "['alternative r&b']",
 "['alternative rap']",
 "['alternative rock']",
 "['alternative']",
 "['art rock']",
 "['classic rock']",
 "['classical']",
 "['contemporary country']",
 "['country blues']",
 "['country pop']",
 "['country rock']",
 "['country']",
 "['dance and electronica']",
 "['dance']",
 "['electro house']",
 "['electronic rock']",
 "['electronic']",
 "['experimental rock']",
 "['folk rock']",
 "['gangsta rap']",
 "['gospel']",
 "['hard rock']",
 "['heavy metal']",
 "['hip hop']",
 "['house']",
 "['jazz']",
 "['metal']",
 "['new metal']",
 "['pop rap']",
 "['pop rock']",
 "['pop']",
 "['punk rock']",
 "['r&b']",
 "['rap rock']",
 "['rap']",
 "['rock']",
 "['soft rock']",
 "['spanish guitar']",
 "['spanish']",
 "['tech house']",
 "['trap']"}

In [None]:
# == 'country'

In [87]:
country_artists = []
for n, data in G.nodes(data=True):
    for tag in G.nodes[n]['tags']:
        if 'country' in tag:
            country_artists.append(n)
        
print('# country_artists: ',len(country_artists))

G_country = G.subgraph(country_artists)
print('Nodes: ', len(G_country.nodes))
print('Edges: ', len(G_country.edges))

net_name = 'country_network_norm_all_attrs'
nx.write_weighted_edgelist(G_country, net_name+'.edgelist', encoding='utf-8', delimiter='<--->')
nx.write_gml(G_country, net_name+'.gml')

# country_artists:  282
Nodes:  282
Edges:  132


In [88]:
rock_artists = []
for n, data in G.nodes(data=True):
    for tag in G.nodes[n]['tags']:
        if 'rock' in tag:
            rock_artists.append(n)
        
print('# rock_artists: ',len(rock_artists))

G_rock = G.subgraph(rock_artists)
print('Nodes: ', len(G_rock.nodes))
print('Edges: ', len(G_rock.edges))

net_name = 'rock_network_norm_all_attrs'
nx.write_weighted_edgelist(G_rock, net_name+'.edgelist', encoding='utf-8', delimiter='<--->')
nx.write_gml(G_rock, net_name+'.gml')

# rock_artists:  2528
Nodes:  2528
Edges:  1636
