In [1]:
import os
import sys
import networkx as nx
import multiprocessing
from joblib import delayed
from joblib import Parallel
from collections import Counter

In [2]:
%load_ext autoreload
%autoreload 2

sys.path.append('../../code')
from org.gesis.libs.utils import printf

In [3]:
DATASETS = ['aps','github','pokec','wikipedia'] #'twitter'
ROOT = '../../data/'

In [4]:
for dataset in DATASETS:
    printf(dataset)
    fn = os.path.join(ROOT,dataset,'{}_attributed_network.gpickle'.format(dataset))
    
    # converting node names to seq. numbers
    G = nx.read_gpickle(fn)
    G = nx.convert_node_labels_to_integers(G, first_label=1, ordering='default')     
    
    # copy
    H = nx.DiGraph()    
    H.add_edges_from(G.edges()) # singletons (degree 0) are ignored.
    
    # setting graph metadata (infering minority label)
    tmp = Counter([G.node[n][G.graph['class']] for n in G.nodes()])
    print(dataset, tmp)
    tmp = tmp.most_common()
    H.graph['name'] = G.graph['name']
    H.graph['class'] = G.graph['class']
    H.graph['labels'] = [tmp[0][0],tmp[1][0]]
    H.graph['groups'] = ['M','m']    
    H.graph['label'] = 'minority'
        
    # setting node attribute: m (is minority?)
    attrs = {n:{'minority':int(G.node[n][G.graph['class']]==H.graph['labels'][1])} for n in G.nodes()}
    nx.set_node_attributes(H, attrs)
    
    # writing new version
    nx.write_gpickle(H, fn.replace(".gpickle",'_anon.gpickle'))
    print(H.graph)
    print(H.node[1]['minority'], G.node[1][G.graph['class']])
    print()
    del(G)
    del(H)

2020-02-07 19:55:12	aps
aps Counter({'05.30.-d': 1157, '05.20.-y': 696})
{'groups': ['M', 'm'], 'class': 'pacs', 'name': 'APS', 'labels': ['05.30.-d', '05.20.-y'], 'label': 'minority'}
0 05.30.-d

2020-02-07 19:55:12	github
github Counter({'male': 293268, 'female': 18487})
{'groups': ['M', 'm'], 'class': 'gender', 'name': 'Github', 'labels': ['male', 'female'], 'label': 'minority'}
0 male

2020-02-07 19:55:43	pokec
pokec Counter({0: 828304, 1: 804336})
{'groups': ['M', 'm'], 'class': 'gender', 'name': 'Pokec', 'labels': [0, 1], 'label': 'minority'}
1 1

2020-02-07 20:06:05	wikipedia
wikipedia Counter({'male': 2678, 'female': 481})
{'groups': ['M', 'm'], 'class': 'gender', 'name': 'Wikipedia', 'labels': ['male', 'female'], 'label': 'minority'}
0 male

