[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/jeljov/NAP2025/blob/main/SNA_Tutorial_Part2.ipynb)

## SNA: Network Centrality Measures

In [None]:
from pathlib import Path

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

import networkx as nx

import warnings

### Data load and network creation

We will use David Krackhardt's High-tech Managers Networks data set. The data were collected from 21 management personnel in a high-tech, machine manufacturing firm to assess the effects of a recent management intervention program.

The data set and its description are available at: http://networkdata.ics.uci.edu/netdata/html/krackHighTech.html

We will load edge lists for two networks with the same actors (nodes), but different kinds of ties among them:
* advice ties - one actor (source / ego) tends to ask the other (target / alter) for advice
* friendship ties - one actor (source / ego) has named the other actor (target / alter) as a friend

Both networks are directed. However, we will treat the friendship network as undirected one, to explore centralities (the main topic of this class) in both kinds of networks.

These edge lists are stored in tabular format in *.txt* files.
To read data from those files, we will use the pandas' **read.table()** function.

In [None]:
from google.colab import files

data_files = files.upload()

In [None]:
krack_friendship = pd.read_table('Krack-High-Tec-edgelist-Friendship.txt',
                                 sep=' ', names=['empty', 'source', 'target', 'weight'])
krack_advice = pd.read_table('Krack-High-Tec-edgelist-Advice.txt',
                                 sep=' ', names=['empty', 'source', 'target', 'weight'])

In [None]:
# Offline version

# krack_friendship = pd.read_table(Path.cwd() / 'data' / 'Krack-High-Tec-edgelist-Friendship.txt',
#                                  sep=' ', names=['empty', 'source', 'target', 'weight'])
# krack_advice = pd.read_table(Path.cwd() / 'data' / 'Krack-High-Tec-edgelist-Advice.txt',
#                                  sep=' ', names=['empty', 'source', 'target', 'weight'])

#### Undirected (friendship) network

In [None]:
krack_friendship.head()

In [None]:
krack_friendship.drop(columns='empty', inplace=True)

# keep only the rows where an edge exists, that is, where weight > 0
friendship_confirmed = krack_friendship.loc[krack_friendship.weight > 0,].copy()
friendship_confirmed.reset_index(drop=True, inplace=True)
friendship_confirmed.info()

In [None]:
friendship_confirmed.weight.value_counts()

As all weights are the same and equal to 1, the attribute is not informative and can be dropped

In [None]:
friendship_confirmed.drop(columns='weight', inplace=True)

We will assume that the friendship is mutual / symmetric relation and create an undirected graph

In [None]:
G_friendship = nx.Graph()

nodes = set(friendship_confirmed.source.tolist()).union(set(friendship_confirmed.target.to_list()))

G_friendship.add_nodes_from(sorted(nodes))

print(G_friendship)

In [None]:
# transform the data frame into a list of tuples of the form (source_node, target_node)
G_friendship.add_edges_from([tuple(row) for _, row in friendship_confirmed.iterrows()])
print(G_friendship)

In [None]:
for s, t in G_friendship.edges():
    print(s, t)

In [None]:
def plot_graph(G, graph_name):
    plt.figure(figsize=(8,8))

    pos = nx.spring_layout(G, seed=9, k=0.85)

    nx.draw_networkx_nodes(G, pos, node_color='lightblue')
    nx.draw_networkx_edges(G, pos, alpha=0.55)
    nx.draw_networkx_labels(G, pos)

    plt.title(graph_name)

    plt.axis('off')
    plt.show()

In [None]:
plot_graph(G_friendship, "The friendship network")

#### Directed (advice) network

We need the same kind of data processing for the advice network

In [None]:
krack_advice.drop(columns='empty', inplace=True)
advice_confirmed = krack_advice.loc[krack_advice.weight > 0,].copy()
advice_confirmed.reset_index(drop=True, inplace=True)
advice_confirmed.drop(columns='weight', inplace=True)
advice_confirmed.info()

Since the propensity to ask someone for advice is often not mutual / symmetric, advice network will be created as a directed network, where each connection has its direction

In [None]:
G_advice = nx.DiGraph()

nodes = set(advice_confirmed.source.tolist()).union(set(advice_confirmed.target.to_list()))

G_advice.add_nodes_from(sorted(nodes))

G_advice.add_edges_from([tuple(row) for _, row in advice_confirmed.iterrows()])

print(G_advice)

In [None]:
plot_graph(G_advice, "The advice network")

### Node Centrality Measures

#### Degree centrality

Degree centrality indicates the number of immediate contacts a network actor has. More precisely, it gives us the proportion of all the network nodes that the given node is directly connected to.

In [None]:
friendship_degree_cent = nx.degree_centrality(G_friendship)

# print sorted centralities, from the highest to the lowest
for node, dc in sorted(friendship_degree_cent.items(), key=lambda item: item[1], reverse=True):
    print(f"{node}: {dc:.3f}")

In [None]:
def advanced_graph_plot(G,
                        graph_name,
                        node_size_modifiers=None,
                        node_color_modifiers=None):

    plt.figure(figsize=(8,8))

    pos = nx.spring_layout(G, seed=9, k=0.95)

    if node_size_modifiers:
        node_size = [150 + 1000*node_size_modifiers[node] for node in G.nodes()]
    else:
        node_size = 300 #default value

    if node_color_modifiers:
        node_color = [node_color_modifiers[node] for node in G.nodes()]
    else:
        node_color = 'skyblue'

    nx.draw_networkx_nodes(G, pos, node_color=node_color, node_size=node_size, cmap='Blues')
    nx.draw_networkx_edges(G, pos, alpha=0.55)
    nx.draw_networkx_labels(G, pos, font_color='indigo')

    plt.title(graph_name)

    plt.axis('off')
    plt.show()

In [None]:
# this is to avoid getting warning about the colormap (cmap) not being user
warnings.simplefilter("ignore", UserWarning)

advanced_graph_plot(G_friendship,
                    'The Friendship network: node size denotes degree centrality',
                    node_size_modifiers=friendship_degree_cent)

Since the advice network is a directed network, we distinguish between degree of incoming and outgoing connections and thus, there are two degree centrality measures:
* in-degree centrality - the proportion of network nodes that are directly linked to the given node
* out-degree centrality - the proportion of network nodes that the current node is directly linked to

In [None]:
advice_indegree_cent = nx.in_degree_centrality(G_advice)
advice_outdegree_cent = nx.out_degree_centrality(G_advice)

In [None]:
for n in sorted(G_advice.nodes()):
    print(f"{n}: in-degree:{advice_indegree_cent[n]:.3f}, out-degree: {advice_outdegree_cent[n]:.3f}")

In [None]:
advanced_graph_plot(G_advice,
                    "The Advice network: node size denotes in-degree, node color out-degree centrality",
                    node_size_modifiers=advice_indegree_cent,
                    node_color_modifiers=advice_outdegree_cent)

#### Closeness centrality

We'll start with the friendship network since undirected networks are easier to deal with when it comes to the closeness centrality.

Before computing closeness, we need to check if the network is connected, since the interpretation of closeness depends on whether the network is connected or not.
A network is connected if there is a path between any pair of nodes in the network.

In [None]:
nx.is_connected(G_friendship)

In [None]:
friendship_closeness = nx.closeness_centrality(G_friendship)

# examine the distribution of the closeness centrality values
pd.Series(friendship_closeness.values()).describe()

Overall, closeness is relatively high in the whole network and fairly similar across the nodes

In [None]:
warnings.simplefilter("ignore", UserWarning)

advanced_graph_plot(G_friendship,
                    "The Friendship network: node size denotes closeness centrality",
                    node_size_modifiers=friendship_closeness)

Now, we move to the Advice network, which is a directed network and thus we need to distinguish between incoming and outgoing connections; this further results in two closeness measures:
* **in-closeness** centrality, and
* **out-closeness** centrality

You can think of in-closeness centrality as the average number of steps one would have to make to get TO a given node FROM all other reachable nodes in the network. Out-closeness centrality, not surprisingly, measures the same thing with the directionality reversed: the average number of steps FROM the given node TO any other reachable node in the network.

First, we need to check if the network is connected. Note: in directed networks, we need to differentiate between two modes of connectedness:
* weak mode, which does not consider edge direction
* strong mode, which does consider the direction of edges when looking for a path between any two nodes in the graph

It is by definition that a graph that is strongly connected is also weakly connected.

In [None]:
nx.is_strongly_connected(G_advice)

Since we have a strongly connected advice network, we can compute in- and out-closeness.

In networkX, for a directed graph, the `closeness_centrality` function computes closeness centrality using inward distance to a node, that is, it computes in-closeness centrality. To use outword distances and compute out-closeness centrality, we need to apply the function to G.reverse()

In [None]:
advice_in_closeness = nx.closeness_centrality(G_advice)
advice_out_closeness = nx.closeness_centrality(G_advice.reverse())

In [None]:
for n in G_advice.nodes():
    print(f"{n}: in-closeness: {advice_in_closeness[n]:.3f}, out-closeness: {advice_out_closeness[n]:.3f}")

In [None]:
advanced_graph_plot(G_advice,
                    "The Advice network: node size denotes in-closseness, node color out-closeness centrality",
                    node_size_modifiers = advice_in_closeness,
                    node_color_modifiers = advice_out_closeness)

#### Betweenness centrality

Betweenness centrality measures the number of shortest paths between node pairs that go through a specific vertex.

In [None]:
friendship_betwenness = nx.betweenness_centrality(G_friendship)

warnings.simplefilter("ignore", UserWarning)
advanced_graph_plot(G_friendship,
                    "The Friendship network: node size denotes betweenness centrality",
                    node_size_modifiers=friendship_betwenness)

In [None]:
for node, bc in sorted(friendship_betwenness.items(), key=lambda item: item[1], reverse=True):
    print(f"{node}: {bc:.3f}")

For directed graphs, such as the advice graph, we use the same function (`betweenness_centrality`) as for undirected graphs, but the results are calculated based on directed shortest paths, that is, the function considers the directions of edges when computing shortes paths

In [None]:
advice_betwenness = nx.betweenness_centrality(G_advice)

warnings.simplefilter('ignore', UserWarning)
advanced_graph_plot(G_advice,
                    "The Advice network: node size denotes betweenness centrality",
                    node_size_modifiers=advice_betwenness)

In [None]:
for node, bc in sorted(advice_betwenness.items(), key=lambda item:item[1], reverse=True):
    print(f"{node}: {bc:.3f}")

#### Eigen vector centrality

Eigen vector centrality helps us mathematically capture the intuitive idea that a person's importance in a network depends heavily on the importance of their social circle.

This measure gives higher scores to nodes that are connected to other highly connected nodes. It reflects the notion that it is not important how many connections one has, but how important one's connections are. Hence, it is often interpreted as a measure of a node's network importance.

Google's PageRank is a well known variant of eigenvector centrality that ranks websites based on the quality of the sites linking to them

In [None]:
friendship_eigenvector = nx.eigenvector_centrality(G_friendship)

for n, ec in sorted(friendship_eigenvector.items(), key=lambda item: item[1], reverse=True):
    print(f"{n}: {ec}")

In [None]:
warnings.simplefilter('ignore', UserWarning)

advanced_graph_plot(G_friendship,
                    "The Friendship network: node color denotes Eigenvector centrality",
                    node_color_modifiers=friendship_eigenvector)

In [None]:
advice_eigenvector = nx.eigenvector_centrality(G_advice)

for n, ec in sorted(advice_eigenvector.items(), key=lambda item: item[1], reverse=True):
    print(f"{n}: {ec}")

In [None]:
warnings.simplefilter('ignore', UserWarning)

advanced_graph_plot(G_advice,
                    "The Advice network: node color denotes Eigenvector centrality",
                    node_color_modifiers=advice_eigenvector)

#### Comparison of centralities across the networks

Since the two networks include the same set of actors (people), but differ in the kinds of relationships they model, we can compare actors' positions (centralities) in these networks to observe how people's network positions differ based on the kinds of relationships they engage in.

To that end, we will first gather all the computed centralities in one data frame

In [None]:
all_centralities = []

for n in G_friendship.nodes():
    all_centralities.append({
        'node': str(n),
        'f_degree': friendship_degree_cent[n],
        'f_closeness': friendship_closeness[n],
        'f_betweenness': friendship_betwenness[n],
        'f_eigenvector': friendship_eigenvector[n],
        'a_in_degree': advice_indegree_cent[n],
        'a_out_degree': advice_outdegree_cent[n],
        'a_in_closeness': advice_in_closeness[n],
        'a_out_closeness': advice_out_closeness[n],
        'a_betweenness': advice_betwenness[n],
        'a_eigenvector': advice_eigenvector[n]
    })

all_centralities_df = pd.DataFrame(all_centralities)
all_centralities_df.head()

Explore, first, degree centralities across the two networks:

In [None]:
degree_df = pd.melt(all_centralities_df[['node','f_degree','a_in_degree', 'a_out_degree']],
                    id_vars=['node'],
                    value_vars=['f_degree','a_in_degree', 'a_out_degree'],
                    var_name='Centrality_type',
                    value_name='Value')
degree_df.head()

In [None]:
plt.figure(figsize=(7, 10))

sb.barplot(data=degree_df,
           x='Value',
           y='node',
           hue='Centrality_type',
           orient='h'
)

plt.title('Degree centrality across the two networks: friendship and advice')
plt.legend(title='Centralities')
plt.show()

We can also check betweenness centrality:

In [None]:
betweenness_df = pd.melt(all_centralities_df[['node','f_betweenness','a_betweenness']],
                    id_vars=['node'],
                    value_vars=['f_betweenness','a_betweenness'],
                    var_name='Centrality_type',
                    value_name='Value')

plt.figure(figsize=(6, 8))
sb.barplot(data=betweenness_df,
           x='Value',
           y='node',
           hue='Centrality_type',
           orient='h'
)
plt.title('Betweenness centrality across the two networks: friendship and advice')
plt.legend(title='Centralities')
plt.show()

Also, Eigenvector centrality speaks a lot in terms of potential for impact

In [None]:
eigenvector_df = pd.melt(all_centralities_df[['node','f_eigenvector','a_eigenvector']],
                    id_vars=['node'],
                    value_vars=['f_eigenvector','a_eigenvector'],
                    var_name='Centrality_type',
                    value_name='Value')

plt.figure(figsize=(7, 9))
sb.barplot(data=eigenvector_df,
           x='Value',
           y='node',
           hue='Centrality_type',
           orient='h'
)
plt.title('Eigenvector centrality across the two networks: friendship and advice')
plt.legend(title='Centralities')
plt.show()