## Sample networks

This notebook contains code to generate random networks to simulate real-world citation networks. 
Nodes will have gender attributes.   

In [1]:
import networkx as nx
import pandas as pd
import pickle
import random

In [2]:
gender_df = pd.read_csv("author2mag_gender.csv", header=None, names=['wos', 'mag', 'gender'])

In [3]:
# filter out unknown gender
gender_df = gender_df[(gender_df.gender != "-1") & (gender_df.gender != "None") & (gender_df.gender != "-2")]

In [4]:
def assign_attributes(G, seed=42):
    
    nodes = list(G.nodes())
    n = len(nodes)
    
    sample = gender_df.sample(n, random_state=seed)
    sample_nodes = sample['mag'].values
    sample_genders = {sample_nodes[i]: {'gender': int(sample.iloc[i].gender)} for i in range(len(nodes))}
    
    
    node_mapping = {nodes[i]: sample_nodes[i] for i in range(len(nodes))}
    nx.relabel_nodes(G, node_mapping, copy=False)
    
    nx.set_node_attributes(G, sample_genders)
    
    return G
    
    
def generate_smallworld(n, k=10, p=0.3, seed=42):
    G = nx.watts_strogatz_graph(n=n, k=k, p=p, seed=seed)
    
    edges = list(G.edges())
    # Swap the direction of half edges to diffuse degree
    di_edges = [(edges[i][0], edges[i][1]) if i % 2 == 0 else (edges[i][1], edges[i][0]) for i in range(len(edges))]
    G = nx.DiGraph()
    G.add_edges_from(di_edges)
    
    G = assign_attributes(G, seed=seed)
    
    return G


def generate_ba_graph(n, m, seed=42):
    G = nx.barabasi_albert_graph(n=n, m=m, seed=seed)
    
    
    edges = list(G.edges())
    # Swap the direction of half edges to diffuse degree
    di_edges = [(edges[i][0], edges[i][1]) if i % 2 == 0 else (edges[i][1], edges[i][0]) for i in range(len(edges))]
    G = nx.DiGraph()
    G.add_edges_from(di_edges)
    
    G = assign_attributes(G, seed=seed)
    
    return G

def generate_er_graph(n, p, seed=42):
    G = nx.gnp_random_graph(n, p, seed=seed, directed=True)
    G = assign_attributes(G, seed=seed)
    return G

In [5]:
n = 500
G_smallworld = generate_smallworld(n, k=4, p=0.2, seed=42)
G_scalefree = generate_ba_graph(n, m=4, seed=43)
G_random = generate_er_graph(n, p=0.05, seed=44)

In [6]:
def save_graph_edgelist(G, filename):
    
    with open(filename + '.edgelist', 'w') as file:
        file.write("{} {}\n".format(G.number_of_nodes(), G.number_of_edges()))
        for s, t in G.edges():
            file.write("{} {} {}\n".format(s, t, random.randint(1, 30)))
    
    with open(filename + '.nodes', 'w') as file:
        for node, data in G.nodes(data=True):
            file.write("{} {}\n".format(node, data['gender']))

In [67]:
save_graph_edgelist(G_smallworld, "smallworld_10k")
save_graph_edgelist(G_scalefree, "scalefree_10k")
save_graph_edgelist(G_random, "randomgraph_10k")