In [22]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pyvis.network import Network
from tqdm import tqdm
import pickle


In [3]:
# Load in Graph and Artist Data
with open("./saved_objects/main_graph.pkl", "rb") as f:
    G = pickle.load(f)
with open("./saved_objects/artist_info.pkl", "rb") as f:
    artist_info = pickle.load(f)

 # Load previous community detection results   
with open("./saved_objects/community_subgraphs.pkl", "rb") as f:
    comm_subgraphs = pickle.load(f)
with open("./saved_objects/genres_by_community.pkl", "rb") as f:
    genres_by_comm= pickle.load(f)

# 4. Centrality Analysis

## _Global_

In [4]:

degree_centrality = sorted(nx.degree_centrality(G), key=lambda x: x[1], reverse=True)
eigenvector_centrality = sorted(nx.eigenvector_centrality(G), key=lambda x: x[1], reverse=True)
pagerank_centrality = sorted(nx.pagerank(G, alpha=0.85), key=lambda x: x[1], reverse=True)

# Too Slow!
# closeness_centrality = nx.closeness_centrality(G)
# betweenness_centrality = nx.betweenness_centrality(G)

## _Local by subgraph_

In [17]:

# # Note: this is only intra-community centrality 
# betweenness_comms = {idx: nx.betweenness_centrality(g) for idx, g in enumerate(comm_subgraphs)}
# eigenvector_comms = {idx: nx.eigenvector_centrality_numpy(g) for idx, g in enumerate(comm_subgraphs)}
# katz_comms = {idx: nx.katz_centrality_numpy(g) for idx, g in enumerate(comm_subgraphs)}

# # Save
# with open("./saved_objects/betweenness_by_comm.pkl", "wb") as f:
#     pickle.dump(betweenness_comms, f)
# with open("./saved_objects/eigenvector_by_comm_by_comm.pkl", "wb") as f:
#     pickle.dump(eigenvector_comms, f)
# with open("./saved_objects/katz_by_comm.pkl", "wb") as f:
#     pickle.dump(katz_comms, f)

# Load
with open("./saved_objects/betweenness_by_comm.pkl", "rb") as f:
    betweenness_comms = pickle.load(f)
with open("./saved_objects/eigenvector_by_comm_by_comm.pkl", "rb") as f:
    eigenvector_comms = pickle.load(f)
with open("./saved_objects/katz_by_comm.pkl", "rb") as f:
    katz_comms = pickle.load(f)


In [37]:
# Creating our own composite centrality measure
lambda_1, lambda_2, lambda_3 = 1.0, 1.0, 1.0

total_centrality = {
    idx: {
        node: lambda_1 * betweenness_comms[idx][node]
        + lambda_2 * eigenvector_comms[idx][node]
        + lambda_3 * katz_comms[idx][node]
        for node in g.nodes()
    }
    for idx, g in enumerate(comm_subgraphs)
}

Display Table of top artists by centrality measure and by community subgraph

In [67]:
columns = ['Artist', 'total_composite', 'Betweenness', 'Eigenvector', 'Katz']
rows = []

pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)

# Function to get top 10 artists from a centrality dictionary
def get_top_artists(centrality_dict, subgraph_id):
    return sorted(centrality_dict[subgraph_id].items(), key=lambda x: x[1], reverse=True)[:10]

# Loop through each community subgraph
for subgraph_id in range(6):  # Assuming 0-5 are the community subgraphs
    total_top = get_top_artists(total_centrality, subgraph_id)
    betweenness_top = get_top_artists(betweenness_comms, subgraph_id)
    eigenvector_top = get_top_artists(eigenvector_comms, subgraph_id)
    katz_top = get_top_artists(katz_comms, subgraph_id)
    
    # Extract artist names and create rows for DataFrame
    for idx in range(10) if subgraph_id == 0 else range(5):  # Assuming top 10
        artist_total = artist_info[total_top[idx][0]][0]
        artist_betweenness = artist_info[betweenness_top[idx][0]][0]
        artist_eigenvector = artist_info[eigenvector_top[idx][0]][0]
        artist_katz = artist_info[katz_top[idx][0]][0]
        row = {
            'Artist': f'Top {idx + 1} Community {subgraph_id}',
            'total_composite': artist_total,
            'Betweenness': artist_betweenness,
            'Eigenvector': artist_eigenvector,
            'Katz': artist_katz
        }
        rows.append(row)

    # display dataframe
    df = pd.DataFrame(rows, columns=columns)
    print(df)
    print()
    rows.clear()


               Artist    total_composite       Betweenness     Eigenvector               Katz
0   Top 1 Community 0           DJ Drama            Tiësto      Gucci Mane             Netsky
1   Top 2 Community 0          Lil Wayne             Diplo   Ty Dolla $ign   Sick Individuals
2   Top 3 Community 0               T.I.        Steve Aoki       Lil Wayne           Claptone
3   Top 4 Community 0           Yo Gotti      David Guetta  French Montana  Pharrell Williams
4   Top 5 Community 0        Wiz Khalifa        Snoop Dogg          Future           Skrillex
5   Top 6 Community 0         Young Thug             R3HAB        2 Chainz           Flo Rida
6   Top 7 Community 0       Busta Rhymes  Armin van Buuren     Chris Brown            SLANDER
7   Top 8 Community 0      Ty Dolla $ign        Gucci Mane       Rick Ross             Khalid
8   Top 9 Community 0  Pharrell Williams     Ty Dolla $ign      Snoop Dogg               KYLE
9  Top 10 Community 0        Chris Brown       Major Lazer  

Analyzing number of highest centrality nodes to remove inorder to disconnect the subgraph

In [73]:
def num_to_remove_centrality(G, sorted_nodes):
    G1 = G.copy()
    count = 0
    while nx.number_connected_components(G1) == 1:
        G1.remove_node(sorted_nodes[count])
        count += 1
    return count

for idx, subgraph in enumerate(comm_subgraphs):
    print("Community num", idx)
    
    num_rem_BC = num_to_remove_centrality(subgraph, sorted([k for k in subgraph.nodes()], key=lambda x: betweenness_comms[idx][x], reverse=True))
    num_rem_EC = num_to_remove_centrality(subgraph, sorted([k for k in subgraph.nodes()], key=lambda x: eigenvector_comms[idx][x], reverse=True))
    num_rem_KC = num_to_remove_centrality(subgraph, sorted([k for k in subgraph.nodes()], key=lambda x: katz_comms[idx][x], reverse=True))

    print(f'num to remove betweeness: {num_rem_BC}, num to remove eigenvector: {num_rem_EC}, num to remove katz: {num_rem_KC}')

# Most number are one, shows that high centrality nodes highly likely to connect with leaf nodes. 
# Maybe this tendency is why they are highly central in the first place. 

Community num 0
num to remove betweeness: 1, num to remove eigenvector: 1, num to remove katz: 1
Community num 1
num to remove betweeness: 1, num to remove eigenvector: 1, num to remove katz: 1
Community num 2
num to remove betweeness: 1, num to remove eigenvector: 1, num to remove katz: 1
Community num 3
num to remove betweeness: 1, num to remove eigenvector: 1, num to remove katz: 1
Community num 4
num to remove betweeness: 1, num to remove eigenvector: 1, num to remove katz: 1
Community num 5
num to remove betweeness: 1, num to remove eigenvector: 1, num to remove katz: 1
Community num 6
num to remove betweeness: 1, num to remove eigenvector: 1, num to remove katz: 1
Community num 7
num to remove betweeness: 1, num to remove eigenvector: 1, num to remove katz: 1
Community num 8
num to remove betweeness: 1, num to remove eigenvector: 1, num to remove katz: 1
Community num 9
num to remove betweeness: 1, num to remove eigenvector: 3, num to remove katz: 1
Community num 10
num to remove

## _Analyze Weak Tie Edges_