# <font color="#49699E" size=40>Connection and Clustering in Social Networks</font>
# LEARNING OBJECTIVES
# LEARNING MATERIALS
# INTRODUCTION
## Imports

In [None]:
import pandas as pd
pd.set_option("display.notebook_repr_html", False)
import numpy as np

from sklearn.metrics.pairwise import euclidean_distances
from scipy.cluster import hierarchy

import networkx as nx
from networkx.algorithms.community import k_clique_communities
from networkx.algorithms.triads import triadic_census
import community

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

import random

from dcss.plotting import format_axes_commas, custom_seaborn
custom_seaborn()

## Data


In [None]:
contact_diaries = pd.read_csv("../data/SocioPatterns/Contact-diaries-network_data_2013.csv", sep=' ')

G = nx.from_pandas_edgelist(contact_diaries, 'i', 'j', create_using=nx.Graph())
G.name = 'Reported Contacts (Diary Data)'
print(nx.info(G))

In [None]:
contact_diaries.j.unique()[:5]

# MICRO-LEVEL NETWORK STRUCTURE AND PROCESSES
## Dyads and Reciprocity


## Triads and Triadic Closure

## MAN for Triads


In [None]:
from networkx.algorithms.triads import triadic_census

gn_50 = nx.gn_graph(50, seed = 42)
sim_50 = pd.Series(triadic_census(gn_50))
sim_50

In [None]:
gn = nx.gn_graph(20, seed = 42)
gn.name = "Simulated DiGraph with 20 Nodes"

def describe_simulated_network(network):
    print(f'Network: {network.name}')
    print(f'Number of nodes: {network.number_of_nodes()}')
    print(f'Number of edges: {network.number_of_edges()}')
    print(f'Density: {nx.density(network)}')
    
describe_simulated_network(gn)

In [None]:
layout = nx.fruchterman_reingold_layout(gn, seed=12)

nx.draw(gn, layout, node_color='darkgray',
        edge_color='gray', node_size=100, width=1)

plt.show()

In [None]:
sim_20_a = pd.Series(triadic_census(gn))
sim_20_a

In [None]:
gn.add_node("an isolate")
describe_simulated_network(gn)

In [None]:
sim_20_b = pd.Series(triadic_census(gn))
sim_20_b

In [None]:
fig, ax = plt.subplots()
sns.scatterplot(x=sim_20_a,
                y=sim_20_a.index,
                s=50,
                alpha=.8,
                label="Simulated network")
sns.scatterplot(x=sim_20_b,
                y=sim_20_b.index,
                color='crimson',
                marker="+",
                s=80,
                label="Simulated network + one isolate")
ax.set(xlabel='Count', ylabel='Triad configurations')
sns.despine()
plt.legend()
plt.show()

In [None]:
clustered_g = nx.null_graph(create_using=nx.DiGraph())
nodes = range(0,20)

for node in nodes:
    clustered_g.add_node(str(node))

for i in range(0,9):
    for j in range(0,9):
        if i != j:
            clustered_g.add_edge(str(i), str(j))

clustered_g.name = 'Simulated DiGraph, Clustered'
describe_simulated_network(clustered_g)

In [None]:
import random

not_clustered_g = nx.null_graph(create_using=nx.DiGraph())

for node in nodes:
    not_clustered_g.add_node(str(node))

for i in range(0,72):
    random_from = str(random.randint(0,19))
    random_to = str(random.randint(0,19))
    while not_clustered_g.has_edge(random_from, random_to):
        random_from = str(random.randint(0,19))
        random_to = str(random.randint(0,19))
    not_clustered_g.add_edge(random_from, random_to)
            
not_clustered_g.name = 'Simulated DiGraph, Not Clustered'
describe_simulated_network(not_clustered_g)

In [None]:
tc_clustered = pd.Series(triadic_census(clustered_g))
tc_not_clustered = pd.Series(triadic_census(not_clustered_g))

In [None]:
fig, ax = plt.subplots()
sns.scatterplot(x=tc_clustered,
                y=tc_clustered.index,
                s=50,
                alpha=.8,
                label="Simulated network, clustered")
sns.scatterplot(x=tc_not_clustered,
                y=tc_not_clustered.index,
                color='crimson',
                marker="+",
                s=80,
                label="Simulated network, not clustered")
ax.set(xlabel='Count', ylabel='Triad configurations')
sns.despine()

plt.legend()
plt.show()

# DETECTING COHESIVE SUBGROUPS AND ASSORTATIVE STRUCTURE


## Cliques and $k$-Clique Communities 


In [None]:
k = 5
ccs = list(k_clique_communities(G, k))
print(f'Identified {len(ccs)} {k}-clique communities.')

In [None]:
communities = [list(c) for c in ccs]
for c in communities:
    print(c)

In [None]:
kccs = {}
for node in G.nodes():
    kcliques = [communities.index(c) for c in communities if node in list(c)]
    kccs[node] = kcliques

In [None]:
overlapping_nodes = []
in_any_kclique = []

for k, v in kccs.items():
    if len(v) > 1:
        overlapping_nodes.append(k)
    if len(v) >= 1:
        in_any_kclique.append(k)

print(
    f'{len(overlapping_nodes)} nodes belong to multiple $k$-clique communities: {overlapping_nodes}.'
)
print(
    f'{len(in_any_kclique)} nodes ({np.round(len(in_any_kclique)/len(G), 2)*100}% of the network) are embedded in at least one $k$-clique community.'
)

In [None]:
layout = nx.nx_pydot.graphviz_layout(G)

In [None]:
edges_in_kcliques = [
    e for e in G.edges() if e[0] in in_any_kclique and e[1] in in_any_kclique
]

labs = {}
for k, v in kccs.items():
    if len(v) == 1:
        labs[k] = v[0]

nx.draw(G,
        layout,
        node_color='darkgray',
        edge_color='lightgray',
        node_size=50,
        width=.5)
nx.draw_networkx_nodes(G,
                       layout,
                       node_color='crimson',
                       node_size=50,
                       nodelist=in_any_kclique)
nx.draw_networkx_edges(G,
                       layout,
                       edge_color='crimson',
                       edgelist=edges_in_kcliques)
labs = nx.draw_networkx_labels(G,
                               layout,
                               labels=labs,
                               font_size=6,
                               font_color='white')

## Community Detection using Louvain and Leiden


In [None]:
part = community.best_partition(G)
q = community.modularity(part, G)
print(f"The modularity of the network is {np.round(q, 3)}.")

In [None]:
colors = [part[n] for n in G.nodes()]
my_colors = plt.cm.Set2

fig, ax = plt.subplots(figsize=(12, 8))
nx.draw_networkx_nodes(G,
                       pos=layout,
                       node_size=100,
                       node_color=colors,
                       cmap=my_colors)
nx.draw_networkx_edges(G, pos=layout, edge_color='lightgray', width=1)
plt.axis('off')

plt.show()

In [None]:
inet = community.induced_graph(part, G)
inet.name = "Induced Ego Network"

weights = [inet[u][v]['weight']  for u,v in inet.edges()]
ipos = nx.nx_pydot.graphviz_layout(inet)

nx.draw(inet, 
        node_color = 'black',
        pos = ipos,
        with_labels = True, 
        font_color = 'white', 
        font_size = 8, 
        width=weights,
        edge_color = "gray")

In [None]:
community = 14
for k,v in part.items():
    if v == community:
        print(k)

### From Louvain to Leiden

In [None]:
import igraph as ig
import leidenalg as la

In [None]:
H = ig.Graph.DataFrame(contact_diaries, directed = False)
part_leiden = la.find_partition(H, la.ModularityVertexPartition)

In [None]:
partition = part.copy()
for membership, node in zip(part_leiden._membership, part_leiden._graph.vs['name']):
    partition[node] = membership

In [None]:
colors = [partition[n] for n in G.nodes()]
my_colors = plt.cm.Set2

fig, ax = plt.subplots(figsize=(12, 8))
nx.draw_networkx_nodes(G,
                       pos=layout,
                       node_size=100,
                       node_color=colors,
                       cmap=my_colors)
nx.draw_networkx_edges(G, pos=layout, edge_color='lightgray', width=1)
plt.axis('off')

plt.show()

In [None]:
dH = ig.Graph.DataFrame(contact_diaries, directed = True)
part_leiden = la.find_partition(dH, la.ModularityVertexPartition, weights = dH.es['weight'])

for membership, node in zip(part_leiden._membership, part_leiden._graph.vs['name']):
    partition[node] = membership

In [None]:
colors = [partition[n] for n in G.nodes()]
my_colors = plt.cm.Set2

fig, ax = plt.subplots(figsize=(12, 8))
nx.draw_networkx_nodes(G,
                       pos=layout,
                       node_size=100,
                       node_color=colors,
                       cmap=my_colors)
nx.draw_networkx_edges(G, pos=layout, edge_color='lightgray', width=1)
plt.axis('off')

plt.show()

## Components and $k$-cores
### Components


In [None]:
nx.number_connected_components(G)

In [None]:
comps_sizes = [len(c) for c in sorted(nx.connected_components(G), key=len, reverse=True)]
print(comps_sizes)

In [None]:
components = sorted(nx.connected_components(G), key=len, reverse=True)
giant = G.subgraph(components[0])
giant.name = "Communication Network, Giant Component"
print(nx.info(giant))

### $k$-Cores


In [None]:
ks = [1,2,3,4,5,6,7,8]
nnodes = []
nedges = []

for k in ks:
    kcore = nx.k_core(G, k)
    nnodes.append(kcore.number_of_nodes())
    nedges.append(kcore.number_of_edges())
    
kdf = pd.DataFrame([ks,nnodes,nedges]).T
kdf.columns = ['Value of K', 'Number of Nodes', 'Number of Edges']
kdf

In [None]:
kcore_4 = nx.k_core(G, 4)
kcore_5 = nx.k_core(G, 5)

In [None]:
# base network
nx.draw(G, layout, node_color = 'lightgray', edge_color = 'lightgray', node_size = 30)

# draw the nodes in the 4-core gray
nx.draw_networkx_nodes(kcore_4, layout, node_size = 30, node_color = 'gray')
nx.draw_networkx_edges(kcore_4, layout, node_size = 30, edge_color = 'gray')

# draw the nodes in the 5-core in crimson
nx.draw_networkx_nodes(kcore_5, layout, node_size = 30, node_color = 'crimson')
nx.draw_networkx_edges(kcore_5, layout, node_size = 30, edge_color = 'crimson')
plt.show()

# CONCLUSION
## Key Points 
