# 3 Analyzing Network Using Centrality Measures

In [1]:
import numpy as np
import pandas as pd
import networkx as nx

from typing import List, Any

In [2]:
# Load edge list and create graph
edge_list_df = pd.read_csv("../outputs/edge_list.csv")

graph = nx.from_pandas_edgelist(edge_list_df, create_using=nx.DiGraph)

Subtask 1: Calculate centrality measurements for each node

In [3]:
out=nx.out_degree_centrality(graph)
nx.set_node_attributes(graph, out, 'out-degree')

In [4]:
bb = nx.betweenness_centrality(graph)
nx.set_node_attributes(graph, bb, 'betweenness')

In [5]:
eigen= nx.eigenvector_centrality(graph)
nx.set_node_attributes(graph, eigen, 'eigen')

Subtask 2: Create a dataframe of all the nodes connected to a book of interest

In [6]:
nodes_centrality_df=pd.DataFrame(
    [
        {"book-chapter": node_name, **node_data}
        for node_name, node_data in graph.nodes(data=True)
    ]
).sort_values("eigen", ascending=False)

nodes_centrality_df.head()

Unnamed: 0,book-chapter,out-degree,betweenness,eigen
280,"('lauret', ' chapter-1')",0.007133,0.032076,0.217647
394,"('munro', ' chapter-1')",0.007133,0.037793,0.205791
462,"('lauret', ' chapter-11')",0.003566,0.010509,0.203417
189,"('yuen', ' chapter-1')",0.004993,0.029097,0.16498
265,"('siriwardena', ' chapter-1')",0.008559,0.040668,0.158386


In [7]:
# Choosing the node with heighest centrality for now
choosen_node = nodes_centrality_df["book-chapter"].iloc[0]
print(choosen_node)

('lauret', ' chapter-1')


In [8]:
def get_connected_nodes(graph: nx.DiGraph, node: Any) -> List[Any]:
    """Returns all successor nodes, excluding self reference"""
    return list(node for node in graph.successors(choosen_node) if node != choosen_node)


connected_nodes = get_connected_nodes(graph, choosen_node)
print(connected_nodes)

["('palmer', ' about-this-book')", "('elger', ' chapter-1')", "('clinton3', ' chapter-1')", "('lauret', ' chapter-3')", "('lauret', ' chapter-5')", "('lauret', ' about-this-book')", "('lauret', ' chapter-11')", "('rotem', ' about-this-book')", "('lauret', ' chapter-9')"]


Subtask 3: Create a dataframe of the nodes connected to the book of interest and their centrality measures.

In [9]:
connected_to_choosen_node_df = nodes_centrality_df[
    nodes_centrality_df["book-chapter"].isin(connected_nodes)
] 

connected_to_choosen_node_df.sort_values("eigen", ascending=False)

Unnamed: 0,book-chapter,out-degree,betweenness,eigen
462,"('lauret', ' chapter-11')",0.003566,0.010509,0.203417
400,"('clinton3', ' chapter-1')",0.003566,0.014915,0.13177
613,"('lauret', ' chapter-3')",0.003566,0.010123,0.123035
277,"('lauret', ' about-this-book')",0.005706,0.029768,0.114395
464,"('lauret', ' chapter-9')",0.001427,0.001795,0.099102
359,"('elger', ' chapter-1')",0.000713,0.0,0.079306
617,"('lauret', ' chapter-5')",0.001427,0.003015,0.079306
281,"('palmer', ' about-this-book')",0.0,0.0,0.058126
1223,"('rotem', ' about-this-book')",0.000713,0.001705,0.058126


Subtask 4: Sort the nodes by their centrality measures to create a basic recommendation.

In [10]:
def get_recommended_node(nodes_centrality_df: pd.DataFrame, connected_nodes: List[Any]) -> Any:
    sorted_connected_to_choosen_node_df = nodes_centrality_df[
        nodes_centrality_df["book-chapter"].isin(connected_nodes)
    ].sort_values("eigen", ascending=False)
    return sorted_connected_to_choosen_node_df["book-chapter"].iloc[0]

In [11]:
recommended_node = get_recommended_node(nodes_centrality_df, connected_nodes)
print(recommended_node)

('lauret', ' chapter-11')


**Observation:**
* Book chapter with highest eigen centrality was `('lauret', ' chapter-1')`
* When chosing this book chapter, based on centrality measure, the recommended book chapter would be `('lauret', ' chapter-11')`