# NTLK

## Part 2: Community detection 

**Explain the concept of modularity in your own words.**

Modularity quantifies how well a network is divided into communities by comparing the density of edges inside communities to edges between communities. High modularity values indicate that there are more connections within communities than would be expected in a random network with the same node degree distribution. This makes modularity a useful metric for identifying meaningful communities in networks.

In [4]:
# load artist genres dictionary
import json

def load_artist_genres_dict():
    artist_genres_dict = {}
    with open('artists_genres_dictionary.txt', 'r') as f:
        artist_genres_dict = json.load(f)
    return artist_genres_dict

artists = load_artist_genres_dict()
print(len(artists))
print(artists)

1833
{'A Thousand Horses': ['country', 'country rock', 'southern rock'], 'A.P. Carter': ['country', 'folk', 'gospel'], 'Aaron Barker': ['country'], 'Aaron Benward': ['country', 'christian music'], 'Aaron Lewis (musician)': ['alternative metal', 'post-grunge', 'nu metal', 'country rock', 'alternative country'], 'Aaron Lines': ['country'], 'Aaron Neville': ['soul', 'country', 'gospel', 'jazz', 'pop'], 'Aaron Pritchett': ['country'], 'Aaron Watson': ['country', 'red dirt', 'texas country'], 'Ace in the Hole Band': ['alternative country', 'neotraditional country', 'western swing'], 'Adam Brand (musician)': ['country'], 'Adam Gregory': ['country music'], 'Adam Harvey': ['country'], 'Adrienne Young': ['bluegrass', 'country', 'old-time music', 'american folk music'], 'Al Anderson (NRBQ)': ['rock', 'country'], 'Al Caiola': ['jazz', 'country', 'rock', 'pop'], 'Al Dexter': ['country'], 'Al Downing (musician)': ['rockabilly', 'rock and roll', 'blues', 'rhythm and blues', 'country', 'gospel'], 'Al

In [12]:
import networkx as nx
import pickle

# Load the graph from the pickle file
with open('../lab5/artist_graph.pkl', 'rb') as f:
    G = pickle.load(f)

# make the graph undirected
G = G.to_undirected()
print(G.number_of_nodes())

rm_nodes = []

for node in G.nodes():
    if node.replace('_', ' ') in artists.keys():
        G.nodes[node]['genre'] = artists[node.replace('_', ' ')][0]
    else:
        rm_nodes.append(node)

for node in rm_nodes:
    G.remove_node(node)

print(G.number_of_nodes())
print(G.nodes(data=True))

2098
1829
[('Haley_&_Michaels', {'len_content': 1370, 'genre': 'country'}), ('Dickey_Betts', {'len_content': 4911, 'genre': 'rock'}), ('Two_Tons_of_Steel', {'len_content': 522, 'genre': 'country'}), ('Bacon_Brothers', {'len_content': 101, 'genre': 'country rock'}), ('Cledus_T._Judd', {'len_content': 5373, 'genre': 'country'}), ('Charlie_Major', {'len_content': 932, 'genre': 'country'}), ('Caryl_Mack_Parker', {'len_content': 752, 'genre': 'country'}), ('Tenille_Arts', {'len_content': 4681, 'genre': 'country'}), ('Tyler_Hubbard', {'len_content': 4453, 'genre': 'country'}), ('Steven_Lee_Olsen', {'len_content': 2802, 'genre': 'country'}), ('O._B._McClinton', {'len_content': 1789, 'genre': 'country'}), ('JT_Hodges', {'len_content': 1991, 'genre': 'country'}), ('Hank_Flamingo', {'len_content': 1029, 'genre': 'country'}), ('Shawn_Camp_(singer)', {'len_content': 1678, 'genre': 'country'}), ('Valerie_June', {'len_content': 4844, 'genre': 'americana'}), ('Roxie_Dean', {'len_content': 995, 'genre

In [18]:
from collections import defaultdict
from networkx.algorithms.community.quality import modularity


# Create a dictionary to hold communities based on the 'category' attribute
communities = defaultdict(list)

# Group nodes by the 'category' attribute
for node, data in G.nodes(data=True):
    category = data['genre']
    communities[category].append(node)

# Convert communities to a list of lists
communities_list = list(communities.values())

# Calculate modularity
mod = modularity(G, communities_list)
print(F"Modularity of the graph: {mod}")

# calculate modularity based on equation



Modularity of the graph: 0.07113789914793135
