## Imports

In [2]:
import os
import glob

import networkx as nx

## Load datasets

### FA2 paper datasets

In [9]:
def load_network_from_adjacency(filepath: str, is_directed=True, sep=" ") -> nx.Graph:
    with open(filepath) as file:
        G = nx.DiGraph() if is_directed else nx.Graph()
        for line in file:
            if line.startswith('#'): continue
            line = line.rstrip('\n')
            n1, n2 = line.split()
            G.add_edge(n1, n2)
    return G

In [10]:
FA2_ROOT = "/Volumes/ERNESTV/SNACS datasets/FA2 datasets/"

#### Extract Basic Infos

In [11]:
def extract_info(G: nx.Graph):
    nodes = G.number_of_nodes()
    edges = G.number_of_edges()
    avg_degree = sum(dict(G.degree()).values()) / float(nodes)
    return nodes, edges, avg_degree

#### Facebook

In [5]:
fb_path = FA2_ROOT + "facebook/*.edges"
fb_files = glob.glob(fb_path)
fb_networks = []
for file in fb_files:
    fb_networks.append(load_network_from_adjacency(filepath=file, is_directed=False))

In [6]:
for file, network in zip(fb_files, fb_networks):
    n, e, avg_d = extract_info(network)
    print(f"Network {os.path.basename(file)} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
    nx.write_gexf(network, path="gephi/" + os.path.basename(file).replace('.edges', ".gexf"))

Network 0.edges has 333 nodes, 2519 edges, and an average degree of 15.12912912912913.
Network 348.edges has 224 nodes, 3192 edges, and an average degree of 28.5.
Network 414.edges has 150 nodes, 1693 edges, and an average degree of 22.573333333333334.
Network 698.edges has 61 nodes, 270 edges, and an average degree of 8.852459016393443.
Network 107.edges has 1034 nodes, 26749 edges, and an average degree of 51.73887814313346.
Network 3437.edges has 534 nodes, 4813 edges, and an average degree of 18.026217228464418.
Network 3980.edges has 52 nodes, 146 edges, and an average degree of 5.615384615384615.
Network 1912.edges has 747 nodes, 30025 edges, and an average degree of 80.38821954484605.
Network 1684.edges has 786 nodes, 14024 edges, and an average degree of 35.68447837150127.
Network 686.edges has 168 nodes, 1656 edges, and an average degree of 19.714285714285715.


#### Twitter

In [7]:
twitter_path = FA2_ROOT + "twitter_use/*.edges"
twitter_files = glob.glob(twitter_path)
twitter_networks = []
for file in twitter_files:
    twitter_networks.append(load_network_from_adjacency(filepath=file, is_directed=True))

In [8]:
for file, network in zip(twitter_files, twitter_networks):
    n, e, avg_d = extract_info(network)
    print(f"Network {os.path.basename(file)} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
    nx.write_gexf(network, path="gephi/" + os.path.basename(file).replace('.edges', ".gexf"))

Network 21420959.edges has 91 nodes, 1787 edges, and an average degree of 39.27472527472528.
Network 24117694.edges has 246 nodes, 9630 edges, and an average degree of 78.29268292682927.
Network 15053535.edges has 18 nodes, 26 edges, and an average degree of 2.888888888888889.
Network 215328630.edges has 10 nodes, 33 edges, and an average degree of 6.6.
Network 248883350.edges has 184 nodes, 9042 edges, and an average degree of 98.28260869565217.
Network 15924858.edges has 10 nodes, 39 edges, and an average degree of 7.8.
Network 430313102.edges has 51 nodes, 1646 edges, and an average degree of 64.54901960784314.
Network 23503181.edges has 101 nodes, 1824 edges, and an average degree of 36.118811881188115.
Network 163374693.edges has 164 nodes, 1749 edges, and an average degree of 21.329268292682926.
Network 314316607.edges has 235 nodes, 15957 edges, and an average degree of 135.80425531914895.
Network 256497288.edges has 213 nodes, 17930 edges, and an average degree of 168.356807511

#### Oregon-2

In [9]:
oregon_path = FA2_ROOT + "oregon/*.txt"
oregon_files = glob.glob(oregon_path)
oregon_networks = []
for file in oregon_files:
    oregon_networks.append(load_network_from_adjacency(file, is_directed=False))

In [10]:
for file, network in zip(oregon_files, oregon_networks):
    n, e, avg_d = extract_info(network)
    print(f"Network {os.path.basename(file)} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
    nx.write_gexf(network, path="gephi/" + os.path.basename(file).replace('.edges', ".gexf"))

Network oregon2_010331.txt has 10900 nodes, 31180 edges, and an average degree of 5.721100917431193.
Network oregon2_010407.txt has 10981 nodes, 30855 edges, and an average degree of 5.619706766232584.
Network oregon2_010428.txt has 11113 nodes, 31434 edges, and an average degree of 5.657158283091874.
Network oregon2_010505.txt has 11157 nodes, 30943 edges, and an average degree of 5.546831585551671.
Network oregon2_010512.txt has 11260 nodes, 31303 edges, and an average degree of 5.560035523978685.
Network oregon2_010519.txt has 11375 nodes, 32287 edges, and an average degree of 5.676835164835165.
Network oregon2_010526.txt has 11461 nodes, 32730 edges, and an average degree of 5.711543495331996.
Network oregon2_010414.txt has 11019 nodes, 31761 edges, and an average degree of 5.764769942826028.
Network oregon2_010421.txt has 11080 nodes, 31538 edges, and an average degree of 5.692779783393502.


#### COND-MAT

In [11]:
cond_mat_file = FA2_ROOT + "COND-MAT/ca-CondMat.txt"
cond_mat_network = load_network_from_adjacency(filepath=cond_mat_file, is_directed=False)

In [12]:
n, e, avg_d = extract_info(cond_mat_network)
print(f"Network {os.path.basename(cond_mat_file)} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
nx.write_gexf(cond_mat_network, path="gephi/" + os.path.basename(cond_mat_file).replace('.edges', ".gexf"))

Network ca-CondMat.txt has 23133 nodes, 93497 edges, and an average degree of 8.083430596982666.


#### GR-QC

In [13]:
gr_qc_file = FA2_ROOT + "GR-QC/ca-GrQc.txt"
gr_qc_network = load_network_from_adjacency(filepath=gr_qc_file, is_directed=False)

In [14]:
n, e, avg_d = extract_info(gr_qc_network)
print(f"Network {os.path.basename(gr_qc_file)} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
nx.write_gexf(gr_qc_network, path="gephi/" + os.path.basename(gr_qc_file).replace('.edges', ".gexf"))

Network ca-GrQc.txt has 5242 nodes, 14496 edges, and an average degree of 5.530713468141931.


#### HEP-PH

In [15]:
hep_ph_file = FA2_ROOT + "HEP-PH/ca-HepPh.txt"
hep_ph_network = load_network_from_adjacency(filepath=hep_ph_file, is_directed=False)

In [16]:
n, e, avg_d = extract_info(hep_ph_network)
print(f"Network {os.path.basename(hep_ph_file)} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
nx.write_gexf(hep_ph_network, path="gephi/" + os.path.basename(hep_ph_file).replace('.edges', ".gexf"))

Network ca-HepPh.txt has 12008 nodes, 118521 edges, and an average degree of 19.740339773484344.


#### Yeast

In [17]:
def load_yeast(filepath: str) -> nx.DiGraph:
    with open(file=filepath) as file:
        G = nx.DiGraph()
        edges = False
        for line in file:
            if line.startswith("*edges"):
                edges = True
                continue
            if edges:
                line = line.rstrip('\n')
                n1, n2 = line.split()
                G.add_edge(n1, n2)
        return G

In [18]:
yeast_file = FA2_ROOT + "yeast/YeastS.net"
yeast_network = load_yeast(filepath=yeast_file)

In [19]:
n, e, avg_d = extract_info(yeast_network)
print(f"Network {os.path.basename(yeast_file)} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
nx.write_gexf(yeast_network, path="gephi/yeast.gexf")

Network YeastS.net has 2361 nodes, 7182 edges, and an average degree of 6.083862770012707.


### New datasets

#### Imports

In [3]:
from graph_tool.all import collection as gt_coll
import graph_tool as gt
from graph_tool.stats import vertex_average

objc[29875]: Class GNotificationCenterDelegate is implemented in both /opt/miniconda3/envs/snacs_fa2/lib/libgio-2.0.0.dylib (0x16f671c30) and /usr/local/Cellar/glib/2.74.0/lib/libgio-2.0.0.dylib (0x17cf6c6b0). One of the two will be used. Which one is undefined.


In [4]:
def extract_info_gt(G: gt.Graph):
    nodes = len(G.get_vertices())    
    edges = len(G.get_edges())
    avg_degree = vertex_average(G, deg='total')
    return nodes, edges, avg_d

In [25]:
def load_network_from_csv(filename: str, is_directed=True):
    with open(filename) as file:
        G = nx.DiGraph() if is_directed else nx.Graph()
        count = 10
        for line in file:
            if count == 0:
                break
            line = line.rstrip('\n')
            n1, n2, *_ = line.split(',')
            G.add_edge(n1, n2)
        return G

In [6]:
NEW_DB_ROOT = "/Volumes/ERNESTV/SNACS datasets/new datasets/"

#### academia_edu

In [14]:
network_name = 'academia_edu'
academia_edu_file = NEW_DB_ROOT + network_name + '/edges.csv'
academia_edu_network = load_network_from_csv(academia_edu_file)
n, e, avg_d = extract_info(academia_edu_network)
print(f"Network {network_name} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
nx.write_gexf(academia_edu_network, path="gephi/" + network_name + ".gexf")

Network academia_edu has 200171 nodes, 1398064 edges, and an average degree of 13.968696764266552.


#### dbpedia_corporation

In [16]:
network_name = 'dbpedia_corporation'
dbpedia_corporation_file = NEW_DB_ROOT + network_name + '/edges.csv'
dbpedia_corporation_network = load_network_from_csv(dbpedia_corporation_file, is_directed=False)
n, e, avg_d = extract_info(dbpedia_corporation_network)
print(f"Network {network_name} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
nx.write_gexf(dbpedia_corporation_network, path="gephi/" + network_name + ".gexf")

Network dbpedia_corporation has 266554 nodes, 463498 edges, and an average degree of 3.4777043300794586.


#### dbpedia_genre

In [18]:
network_name = 'dbpedia_genre'
dbpedia_genre_file = NEW_DB_ROOT + network_name + '/edges.csv'
dbpedia_genre_network = load_network_from_csv(dbpedia_genre_file, is_directed=False)
n, e, avg_d = extract_info(dbpedia_genre_network)
print(f"Network {network_name} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
nx.write_gexf(dbpedia_genre_network, path="gephi/" + network_name + ".gexf")

Network dbpedia_genre has 266554 nodes, 463498 edges, and an average degree of 3.4777043300794586.


#### notre_dame_web

In [19]:
network_name = 'notre_dame_web'
notre_dame_web_file = NEW_DB_ROOT + network_name + '/edges.csv'
notre_dame_web_network = load_network_from_csv(notre_dame_web_file)
n, e, avg_d = extract_info(notre_dame_web_network)
print(f"Network {network_name} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
nx.write_gexf(notre_dame_web_network, path="gephi/" + network_name + ".gexf")

Network notre_dame_web has 325731 nodes, 1497135 edges, and an average degree of 9.192462492056329.


#### corporate_directors

In [20]:
network_name = 'corporate_directors'
corporate_directors_file = NEW_DB_ROOT + network_name + '/edges.csv'
corporate_directors_network = load_network_from_csv(corporate_directors_file, is_directed=False)
n, e, avg_d = extract_info(corporate_directors_network)
print(f"Network {network_name} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
nx.write_gexf(corporate_directors_network, path="gephi/" + network_name + ".gexf")

Network corporate_directors has 356640 nodes, 376919 edges, and an average degree of 2.1137225213100046.


#### lkml_thread

In [29]:
network_name = 'lkml_thread'
lkml_thread_file = NEW_DB_ROOT + network_name + '/edges.csv'
lkml_thread_network = load_network_from_csv(lkml_thread_file, is_directed=True)
n, e, avg_d = extract_info(lkml_thread_network)
print(f"Network {network_name} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
nx.write_gexf(lkml_thread_network, path="gephi/" + network_name + ".gexf")

#### bookcrossing

In [27]:
network_name = 'bookcrossing'
bookcrossing_file = NEW_DB_ROOT + network_name + '/edges.csv'
bookcrossing_network = load_network_from_csv(bookcrossing_file, is_directed=False)
n, e, avg_d = extract_info(bookcrossing_network)
print(f"Network {network_name} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
nx.write_gexf(bookcrossing_network, path="gephi/" + network_name + ".gexf")

Network bookcrossing has 445803 nodes, 1149740 edges, and an average degree of 5.158063090647663.


#### petster

In [None]:
network_name = 'petster'
petster_file = NEW_DB_ROOT + network_name + '/edges.csv'
petster_network = load_network_from_csv(petster_file, is_directed=False)
n, e, avg_d = extract_info(petster_network)
print(f"Network {network_name} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
nx.write_gexf(petster_network, path="gephi/" + network_name + ".gexf")

#### berkstan_web

In [None]:
network_name = 'berkstan_web'
berkstan_web_file = NEW_DB_ROOT + network_name + '/edges.csv'
berkstan_web_network = load_network_from_csv(berkstan_web_file, is_directed=True)
n, e, avg_d = extract_info(berkstan_web_network)
print(f"Network {network_name} has {n} nodes, {e} edges, and an average degree of {avg_d}.")
nx.write_gexf(berkstan_web_network, path="gephi/" + network_name + ".gexf")