## User-Artist Interactions Network

In [7]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from IPython.display import display
import os

In [2]:
user_artists = pd.read_csv('../data/hetrec2011-lastfm-2k/user_artists.dat', sep= '\t')
artists = pd.read_csv('../data/hetrec2011-lastfm-2k/artists.dat', sep= '\t')

print(user_artists['userID'].nunique(),user_artists['artistID'].nunique())
display(user_artists.head(5))
display(artists.head(5))

1892 17632


Unnamed: 0,userID,artistID,weight
0,2,51,13883
1,2,52,11690
2,2,53,11351
3,2,54,10300
4,2,55,8983


Unnamed: 0,id,name,url,pictureURL
0,1,MALICE MIZER,http://www.last.fm/music/MALICE+MIZER,http://userserve-ak.last.fm/serve/252/10808.jpg
1,2,Diary of Dreams,http://www.last.fm/music/Diary+of+Dreams,http://userserve-ak.last.fm/serve/252/3052066.jpg
2,3,Carpathian Forest,http://www.last.fm/music/Carpathian+Forest,http://userserve-ak.last.fm/serve/252/40222717...
3,4,Moi dix Mois,http://www.last.fm/music/Moi+dix+Mois,http://userserve-ak.last.fm/serve/252/54697835...
4,5,Bella Morte,http://www.last.fm/music/Bella+Morte,http://userserve-ak.last.fm/serve/252/14789013...


In [10]:
temp = pd.merge(user_artists, artists[['id', 'name']], 
         left_on = 'artistID', 
         right_on='id', 
         how = 'inner').rename({'name': 'artist_name'}, axis = 1)

print(temp.shape)
temp.head()


(92834, 5)


Unnamed: 0,userID,artistID,weight,id,artist_name
0,2,51,13883,51,Duran Duran
1,4,51,228,51,Duran Duran
2,27,51,85,51,Duran Duran
3,28,51,10,51,Duran Duran
4,62,51,528,51,Duran Duran


Filter artists who have atleast 100 people listening to them

In [5]:
temp['cnt'] = temp.groupby(['artist_name'])['id'].transform('count')
temp_filtered = temp[temp['cnt']>100].reset_index(drop=True)

In [9]:
file_path = '../data/intermediate/user_artist_cleaned.csv'
os.makedirs(os.path.dirname(file_path), exist_ok=True)


temp_filtered.to_csv(file_path, index = False)

In [12]:
top_artists = temp_filtered[['artistID','artist_name']].drop_duplicates().reset_index(drop = True)
print(top_artists.shape)
top_artists.head(5)

(125, 2)


Unnamed: 0,artistID,artist_name
0,51,Duran Duran
1,55,Kylie Minogue
2,56,Daft Punk
3,59,New Order
4,65,Coldplay


In [13]:
file_path = '../data/intermediate/top_artists.csv'
os.makedirs(os.path.dirname(file_path), exist_ok=True)


top_artists.to_csv(file_path, index = False)

Create User-artist network

In [14]:
B = nx.Graph()
B.add_nodes_from(list(temp_filtered['userID'].unique()), bipartite=0)
B.add_nodes_from(list(temp_filtered['artist_name'].unique()), bipartite=1)
# B.add_edges_from(list(zip(temp_filtered['userID'], temp_filtered['artist_name'])))
B.add_weighted_edges_from(list(zip(temp_filtered['userID'], temp_filtered['artist_name'], temp_filtered['weight'])))
print(nx.info(B))

Name: 
Type: Graph
Number of nodes: 1938
Number of edges: 24809
Average degree:  25.6027


Write as gexf format to read in gephi

In [15]:
file_path = '../data/networkx graphs/user_artist.gexf'
os.makedirs(os.path.dirname(file_path), exist_ok=True)

nx.write_gexf(B, file_path)

User-Artist Network visualization

<img src="../data/images/user_artist.png" alt="drawing" width="500"/>