### Format data as nodes and edges and graph 
This script takes the music library data collected in step 1 and creates the network graph
Edges between artists are created by matching the first (most prevelant) genre of an artists songs with the
prevelance of that genre in other artists songs.

In [1]:
# Import libraries
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from pyvis.network import Network
import networkx as nx
from IPython.display import display, HTML

import streamlit as st
import streamlit.components.v1 as components


In [2]:
df0 = pd.read_csv('ArtistGenres.csv', encoding='utf-8')

In [3]:
df0['Main'] = ''

In [4]:
# get first genre from genre list
pd.options.mode.chained_assignment = None  # default='warn'
for i in range(0,len(df0)):
    #print(i)
    t1 = df0['Genre'][i]
    t2 = t1.strip('[]')
    tl = t2.split(',')[0]   
    df0['Main'][i] = tl

In [5]:
net = Network('900px', '900px', notebook=False)

length_nodes = len(df0)
colour_list = sns.color_palette("viridis", length_nodes).as_hex()

for i in range(0, length_nodes):
    val = (df0['SongCount'][i])*10 
    net.add_node(str(i),
              label = str(df0['Artist'][i]),
              value = str(val),
              title = str(df0['Artist'][i]),
              group = str(df0['Main'][i]))

In [6]:
# Calculate node weights
# 1 - from Node ID 2 - to Node ID, Weight

# Directional?
# for each artists' main genre, create an edge with other artists who share that genre at all
# The list order of the second artists' genre determines weight, 100 minus 1 for each character before the matches genre in the list
# *** replace this with list position (comma seperated) ***

for i in range(0, length_nodes):
    
    a1 = df0['Artist'][i]
    g1 = df0['Main'][i]
    
    idx = df0[df0['Genre'].str.contains(g1, case=False, regex=False)]
    idx_d = idx[idx.Artist != a1]
    if ~bool(idx.empty):
        for x in range(0, len(idx_d)):
            # calculate weight based on genre match position
            
            n = idx_d.index[x]
            
            s = idx_d['Genre'][n]
            char_cnt = s.index(g1)
            
            w = 500 - char_cnt
            if w <1:
                w = 1
            ws = round( ((w/500)*100) /10)

            # create the edges
            net.add_edge(str(i), str(n), weight = str(ws))
            
    else:
        print('No genre matches found')

In [7]:
# Save the graph to an HTML file
net.save_graph("MusicLibraryNetworkGraph.html")

In [8]:
net.toggle_physics(True)
net.show('MusicLibraryNetworkGraph.html', notebook=False)

MusicLibraryNetworkGraph.html
