# Constructing the network:

In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import os

Data is extracted from: https://www.kaggle.com/stackoverflow/stack-overflow-tag-network

In [2]:
print(os.path.join(os.getcwd(), 'sample_data'))

C:\Users\alanc\OneDrive\Desktop\graph_ml_concepts\sample_data


In [3]:
# Read in data:
base_path = os.path.join(os.getcwd(), 'sample_data')

df_nodes = pd.read_csv(os.path.join(base_path,'stack_network_nodes.csv'))
df_edges = pd.read_csv(os.path.join(base_path,'stack_network_links.csv'))

In [4]:
# Initialize network using networkx:
G = nx.Graph(name="stackoverflow")

# Add nodes to the network:
for idx, row in df_nodes.iterrows():
    G.add_node(row['name'], group=row['group'], nodesize=row['nodesize'])
    
    
# Add edges to the network:
for idx, row in df_edges.iterrows():
    G.add_weighted_edges_from([(row['source'], row['target'], row['value'])])

## Compute node properties

In [8]:
sample_node = 'azure'

# Compute degree of a node:
print(f"Degree of node: {sample_node} is {G.degree()[sample_node]}")

# Compute eigenvector centrality of a node:
# Measure of importance of node based on importances of other nodes
eigen_centrality = nx.eigenvector_centrality(G)
print(f"Eigenvector centrality of node: {sample_node} is {round(eigen_centrality[sample_node], 4)}")

# Compute betweenness centrality of a node:
# Measure of how frequent a node lies of shortest paths between other pairs of nodes
betweeness_centrality = nx.betweenness_centrality(G)
print(f"Betweenness centrality of node: {sample_node} is {round(betweeness_centrality[sample_node], 4)}")

# Compute closeness centrality of a node:
# Measure of length of paths between given node and other nodes it is connected to
closeness_centrality = nx.closeness_centrality(G)
print(f"Closeness centrality of node: {sample_node} is {round(closeness_centrality[sample_node], 4)}")

# Compute closeness centrality of a node:
# Measures how connected a node's neighboring nodes are
clustering_coef = nx.clustering(G)
print(f"Clustering coefficient of node: {sample_node} is {round(clustering_coef[sample_node], 4)}")

Degree of node: azure is 5
Eigenvector centrality of node: azure is 0.0381
Betweenness centrality of node: azure is 0.054
Closeness centrality of node: azure is 0.212
Clustering coefficient of node: azure is 0.5


## Compute edges properties

In [15]:
sample_nodes = [('azure', '.net')]

### Local neighbourhood overlap

# Compute Jaccard coefficient of a pair of nodes:
# Captures no. of neighbouring nodes shared between two nodes
jcs = nx.jaccard_coefficient(G, sample_nodes)
for u, v, p in jcs:
    print(f"Jaccard coefficient of nodes {u} & {v} is {round(p, 4)}")
    
# Compute Adamic-Adar of a pair of nodes:
# Calculates degree of all neighbouring nodes shared between u and v
aai = nx.adamic_adar_index(G, sample_nodes)
for u, v, p in aai:
    print(f"Adamic-Adar index of nodes {u} & {v} is {round(p, 4)}")

Jaccard coefficient of nodes azure & .net is 0.1818
Adamic-Adar index of nodes azure & .net is 0.7688
