In [1]:
%load_ext autoreload
%autoreload 2

import networkx as nx
import os
from joblib import Parallel, delayed
import multiprocessing
from collections import Counter
from org.gesis.libs.utils import printf

In [2]:
DATASETS = ['aps','github','pokec','wikipedia'] #'twitter'
ROOT = '../data/'

In [3]:
for dataset in DATASETS:
    printf(dataset)
    fn = os.path.join(ROOT,dataset,'{}_attributed_network.gpickle'.format(dataset))
    
    # converting node names to seq. numbers
    G = nx.read_gpickle(fn)
    G = nx.convert_node_labels_to_integers(G, first_label=1, ordering='default')     
    
    # copy
    H = nx.DiGraph()    
    H.add_edges_from(G.edges())
    
    # setting graph metadata (infering minority label)
    tmp = Counter([G.node[n][G.graph['class']] for n in G.nodes()])
    print(dataset, tmp)
    tmp = tmp.most_common()
    H.graph['name'] = G.graph['name']
    H.graph['class'] = G.graph['class']
    H.graph['labels'] = [tmp[0][0],tmp[1][0]]
    H.graph['groups'] = ['M','m']    
    H.graph['label'] = 'minority'
        
    # setting node attribute: m (is minority?)
    attrs = {n:{'minority':int(G.node[n][G.graph['class']]==H.graph['labels'][1])} for n in G.nodes()}
    nx.set_node_attributes(H, attrs)
    
    # writing new version
    nx.write_gpickle(H, fn.replace(".gpickle",'_anon.gpickle'))
    print(H.graph)
    print(H.node[1]['minority'], G.node[1][G.graph['class']])
    print()
    del(G)
    del(H)

2020-01-29 21:45:21	aps
aps Counter({'05.30.-d': 1157, '05.20.-y': 696})
{'name': 'APS', 'class': 'pacs', 'label': ['minority'], 'labels': ['05.30.-d', '05.20.-y'], 'groups': ['M', 'm']}
0 05.30.-d

2020-01-29 21:45:21	github
github Counter({'male': 293268, 'female': 18487})
{'name': 'Github', 'class': 'gender', 'label': ['minority'], 'labels': ['male', 'female'], 'groups': ['M', 'm']}
0 male

2020-01-29 21:45:57	pokec
pokec Counter({0: 828304, 1: 804336})
{'name': 'Pokec', 'class': 'gender', 'label': ['minority'], 'labels': [0, 1], 'groups': ['M', 'm']}
0 0

2020-01-29 21:55:52	wikipedia
wikipedia Counter({'male': 2678, 'female': 481})
{'name': 'Wikipedia', 'class': 'gender', 'label': ['minority'], 'labels': ['male', 'female'], 'groups': ['M', 'm']}
0 male

