In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Read in the adjacency matrix for village 1
df = pd.read_csv(
    "data/Network Data/Adjacency Matrices/adj_allVillageRelationships_HH_vilno_1.csv",
    header=None,
)

# Change it into matrix form
adj_all_v1 = df.values
print(adj_all_v1)

In [None]:
# We plot the adjacency matrix - Blue:connection and White:no connection
plt.figure(figsize=(5, 5))
plt.imshow(adj_all_v1, cmap="Blues", interpolation="none")
plt.gca().invert_yaxis()
plt.title("Adjacency Matrix")
plt.xlabel("Node ID")
plt.ylabel("Node ID")
plt.show()

In [None]:
# We generate the graph from the adjacency matrix
G = nx.from_numpy_array(adj_all_v1)
print(f"Number of nodes in G: {G.number_of_nodes()}")
print(f"Number of edges in G: {G.number_of_edges()}")

# Compute the average degree of the graph, which is the number of edges a node in the network has on average. Average degree is measured as total_no_of_edges/total_no_of_nodes

In [None]:
#  Plot the graph for visualisation
plt.figure(figsize=(5, 5))
pos = nx.random_layout(G, seed=127)  # We select random layout for nodes
nx.draw_networkx_nodes(G, pos, node_color="black", node_size=100)
nx.draw_networkx_edges(G, pos, edge_color="gray", width=0.7)

In [None]:
# Is there a better way to visualise the network?
plt.figure(figsize=(5, 5))
pos = nx.spring_layout(G, seed=7)
nx.draw_networkx_nodes(G, pos, node_color="black", node_size=100)
nx.draw_networkx_edges(G, pos, edge_color="gray", width=0.7)

In [None]:
# We take the biggest connected component of the network for analysis
G_giant = G.subgraph(max(nx.connected_components(G), key=len)).copy()

# Relabel nodes in the Giant component from 1 to N, where N is the number of nodes in the giant component
node_labels_old = list(G_giant.nodes())
node_labels_new = list(range(0, len(node_labels_old) + 1))

node_label_map = dict(zip(node_labels_old, node_labels_new))

G_final = nx.relabel_nodes(G_giant, node_label_map)

# We save the node mapping
node_labels_df = pd.DataFrame(
    {
        "original_node_label": node_labels_old,
        "new_node_label": [node_label_map[old_label] for old_label in node_labels_old],
    }
)
node_labels_df

In [None]:
#  Print the no of nodes, edges and the average degree in the giant component. Has the average degree changed?

In [None]:
# Now plot the giant component using force layout

In [None]:
# We now compute the centrality metrics for nodes in the giant component
degree_centrality = nx.degree_centrality(G_final)
closeness_centrality = nx.closeness_centrality(G_final)
betweenness_centrality = nx.betweenness_centrality(G_final)
eigenvector_centrality = nx.eigenvector_centrality(G_final)

centralities = {
    "Degree": degree_centrality,
    "Closeness": closeness_centrality,
    "Betweenness": betweenness_centrality,
    "Eigenvector": eigenvector_centrality,
}

# We plot the distribution of the centralities
fig, axs = plt.subplots(2, 2, figsize=(10, 8))
axs = axs.flatten()

#
for i, (title, data) in enumerate(centralities.items()):
    axs[i].hist(list(data.values()), bins=20)
    axs[i].set_xlabel(f"{title}")
    axs[i].set_ylabel("Freq")
    axs[i].grid(False)

plt.tight_layout()
plt.show()

In [None]:
# We store the centralities in a dataframe
centrality_df = pd.merge(
    pd.DataFrame(
        {
            "node": list(degree_centrality.keys()),
            "degree_centrality": list(degree_centrality.values()),
            "closeness_centrality": list(closeness_centrality.values()),
            "betweenness_centrality": list(betweenness_centrality.values()),
            "eigenvector_centrality": list(eigenvector_centrality.values()),
        }
    ),
    node_labels_df,
    left_on="node",
    right_on="new_node_label",
).drop(columns=["new_node_label"])
print(
    centrality_df.sort_values(by="degree_centrality", ascending=False)
    .reset_index(drop=True)
    .head(5)
)

In [None]:
# Plot the network again and colour nodes based on their degree centrality
node_color = [degree_centrality[node] for node in G_final.nodes()]

pos = nx.spring_layout(G_final, seed=7, k=0.01, iterations=100)
fig, ax = plt.subplots(figsize=(15, 7))
nodes = nx.draw_networkx_nodes(
    G_final,
    pos,
    node_color=node_color,
    cmap=plt.cm.viridis,
    node_size=500,
    alpha=0.75,
    ax=ax,
)
nx.draw_networkx_edges(G_final, pos, alpha=0.3, ax=ax)
nx.draw_networkx_labels(G_final, pos, font_size=8, font_color="white")

sm = plt.cm.ScalarMappable(
    cmap=plt.cm.viridis, norm=plt.Normalize(vmin=min(node_color), vmax=max(node_color))
)

cbar = fig.colorbar(sm, ax=ax)
ax.axis("off")
plt.tight_layout()
plt.show()

In [None]:
# Sort nodes based on closeness centrality and print the top 5 nodes

In [None]:
# Plot the network again and colour nodes based on their closeness centrality - does it look different?

In [None]:
### For all 75 villages compute the avg centralities [Advanced]

In [None]:
# Now we add characteristics of households to the centrality dataframe
# First read in the demographics data
demographics_df = pd.read_stata(
    "data/Demographics and Outcomes/household_characteristics.dta"
)
# Filter for village 1
demographics_v1 = demographics_df[demographics_df["village"] == 1].copy()
demographics_v1

In [None]:
# Join demographics data with the centrality dataframe
# We have to match the node IDs
demographics_v1["adjusted_key"] = demographics_v1["adjmatrix_key"] - 1
node_centrality_and_demographics = pd.merge(
    centrality_df,
    demographics_v1,
    left_on="original_node_label",
    right_on="adjusted_key",
    how="left",
)
node_centrality_and_demographics

In [None]:
# Get the average degree centrality of leaders and non-leaders
grp_col = "degree_centrality"
group_by_leader_status = (
    node_centrality_and_demographics.groupby("leader")[grp_col]
    .agg(["mean", "std"])
    .reset_index()
)

plt.bar(
    group_by_leader_status["leader"],
    group_by_leader_status["mean"],
    yerr=group_by_leader_status["std"],
    capsize=5,
    color=["grey", "indianred"],
    tick_label=["Non-leader", "Leader"],
)

plt.xticks([0, 1])
plt.ylabel(grp_col)
plt.tight_layout()
plt.show()

In [None]:
# Get the average closeness centrality of leaders and non-leaders

In [None]:
# Define diffusion model
def run_simulation(N, T, qN, qP, pP, seed_nodes, beta, seed=42):
    # initialise the node sets
    np.random.seed(seed)
    informed_nodes = np.zeros(N)
    participant_nodes = np.zeros(N)

    # 0 - uninformed node, 1 - informed node
    informed_nodes[seed_nodes] = 1

    # We extract the informed node set. We do this because each node has only one shot at adopting microfinance
    newly_informed_nodes_list = seed_nodes

    infection_rate = []

    for t in range(1, T):
        participant_node_list = []

        #  Adoption process
        for newly_informed_node in newly_informed_nodes_list:
            neighbours = list(G_final.neighbors(newly_informed_node))

            # We transform node characteristics and neighbour effects into probability of adoption using a logistic function
            if np.random.rand() <= 1 / (
                1
                + np.exp(
                    -(
                        beta[0] * pP[newly_informed_node]
                        + beta[1] * sum(participant_nodes[neighbours])
                    )
                )
            ):
                participant_node_list.append(newly_informed_node)

        participant_nodes[participant_node_list] = 1

        infection_rate.append(sum(participant_nodes) / N)

        informed_nodes[newly_informed_nodes_list] = 1
        informed_nodes_list = np.where(informed_nodes == 1)[0]

        newly_informed_nodes_list = []

        #  Transmission process
        for informed_node in informed_nodes_list:
            if (participant_nodes[informed_node] == 1 and np.random.rand() <= qP) or (
                participant_nodes[informed_node] == 0 and np.random.rand() <= qN
            ):
                newly_informed_nodes_list.extend(
                    [
                        n
                        for n in G_final.neighbors(informed_node)
                        if informed_nodes[n] == 0
                    ]
                )

    return infection_rate, informed_nodes, participant_nodes

In [None]:
np.random.seed(12)
N = G_final.number_of_nodes()  # number of individuals
T = 25
qN = 0.01  # probability that a non-taker transmits info
qP = 0.1  # probability that a taker transmits info
pP = np.random.randint(0, 2, size=N)  # randomly generate household characteristics
f = 0.1
beta = np.array([1, 1])

centrality_measure = "degree_centrality"

seed_nodes = (
    centrality_df.sort_values(by=centrality_measure, ascending=False)
    .reset_index()
    .head(round(f * N))["node"]
    .values
)
infection_rate, informed_nodes, participant_nodes = run_simulation(
    N, T, qN, qP, pP, seed_nodes, beta
)

node_colors = ["indianred" if participant_nodes[i] == 1 else "black" for i in range(N)]
pos = nx.spring_layout(G_final, seed=7, k=0.01, iterations=100)


fig, axes = plt.subplots(1, 2, figsize=(15, 6))
plt.rcParams.update({"font.size": 18})

axes[0].plot(infection_rate, marker="o")
axes[0].set_xlabel("T")
axes[0].set_ylabel("Uptake Rate")


nx.draw_networkx_nodes(
    G_final, pos, node_color=node_colors, node_size=300, alpha=0.7, ax=axes[1]
)
nx.draw_networkx_edges(G_final, pos, edge_color="gray", ax=axes[1])
axes[1].set_title("Red = Participant; Black = Does not participate")
axes[1].axis("off")

plt.tight_layout()
plt.show()

In [None]:
np.random.seed(12)
N = G_final.number_of_nodes()  # number of individuals
T = 25
qN = 0.01  # probability that a non-taker transmits info
qP = 0.1  # probability that a taker transmits info
pP = np.random.randint(0, 2, size=N)  # randomly generate household characteristics
f = 0.1
beta = np.array([1, 1])

centrality_measure = "degree_centrality"

seed_nodes = (
    centrality_df.sort_values(by=centrality_measure, ascending=False)
    .reset_index()
    .head(round(f * N))["node"]
    .values
)
infection_rate, informed_nodes, participant_nodes = run_simulation(
    N, T, qN, qP, pP, seed_nodes, beta
)

infection_rate_random = []
for i in range(100):
    random_seed = np.random.randint(0, N, round(f * N))
    infection_rate_rand, informed_nodes_rand, participant_nodes_rand = run_simulation(
        N, T, qN, qP, pP, random_seed, beta
    )

    infection_rate_random.append(infection_rate_rand)

mean_infection_rate_random = np.mean(np.array(infection_rate_random), axis=0)
std_infection_rate_random = np.std(np.array(infection_rate_random), axis=0)

node_colors = ["indianred" if participant_nodes[i] == 1 else "black" for i in range(N)]
pos = nx.spring_layout(G_final, seed=7, k=0.01, iterations=100)

fig, axes = plt.subplots(1, 2, figsize=(15, 6))
plt.rcParams.update({"font.size": 18})

axes[0].plot(infection_rate, marker="o", label=centrality_measure)
axes[0].errorbar(
    np.arange(len(mean_infection_rate_random)),
    mean_infection_rate_random,
    yerr=std_infection_rate_random,
    fmt="-o",
    capsize=5,
    label="random",
    elinewidth=1,
)

axes[0].set_xlabel("T")
axes[0].set_ylabel("Uptake Rate")
axes[0].legend()


nx.draw_networkx_nodes(
    G_final, pos, node_color=node_colors, node_size=300, alpha=0.7, ax=axes[1]
)
nx.draw_networkx_edges(G_final, pos, edge_color="gray", ax=axes[1])
axes[1].set_title("Red = Participant; Black = Not a participant")
axes[1].axis("off")

plt.tight_layout()
plt.show()

In [None]:
# Seed nodes based on other centrality measures - how do they each perform in comparison to each other?
# Input characteristics from the node_centrality_and_demographics dataframe
## Replicate diffusion model for other villages? Which centrality measure works the best on average?