In [1]:
__author__ = "Lisette Espin"
__copyright__ = "Copyright 2019, GESIS"
__credits__ = ["Lisette Espin-Noboa", 
               "Fariba Karimi",                
               "Claudia Wagner",
               "Markus Strohmaier"]
__license__ = "GPL"
__version__ = "1.0.0"
__maintainer__ = "Lisette Espin"
__email__ = "lisette.espin@gesis.org"
__status__ = "Development"

<div style="text-align:center;"><h1>Anonymizing Real-World networks</h1></div>

<h2>Dependencies</h2>

In [2]:
import os
import sys
import networkx as nx
import multiprocessing
from joblib import delayed
from joblib import Parallel
from collections import Counter

In [3]:
%load_ext autoreload
%autoreload 2

sys.path.append('../../code')
from org.gesis.libs.utils import printf

<h2>Constants and Functions</h2>

In [4]:
DATASETS = ['aps', 'apsgender3', 'apsgender8', 'github', 'pokec', 'wikipedia'] #'twitter'
ROOT = '../../data/'

In [5]:
def anon(dataset, root):
    printf(dataset)
    fn = os.path.join(ROOT,dataset,'{}_attributed_network.gpickle'.format(dataset))
    fnanon = fn.replace(".gpickle",'_anon.gpickle')
    
    if os.path.exists(fnanon):
        print("{} already done.".format(dataset))
        H = nx.read_gpickle(fnanon)
        
        if type(H.graph['label']) == list:
            H.graph['label'] = H.graph['label'][0]
            nx.write_gpickle(H, fnanon)
            printf('{} updated!'.format(fnanon))
            
        print(H.graph)
        print()
        
        return
    else:
        # converting node names to seq. numbers
        G = nx.read_gpickle(fn)
        G = nx.convert_node_labels_to_integers(G, first_label=1, ordering='default')     

        # copy
        H = nx.DiGraph()    
        H.add_edges_from(G.edges()) # singletons (degree 0) are ignored.
    
        # setting graph metadata (infering minority label)
        tmp = Counter([G.node[n][G.graph['class']] for n in G.nodes()])
        print(dataset, tmp)
        tmp = tmp.most_common()
        H.graph['name'] = G.graph['name']
        H.graph['class'] = G.graph['class']
        H.graph['labels'] = [tmp[0][0],tmp[1][0]]
        H.graph['groups'] = ['M','m']    
        H.graph['label'] = 'minority'

        # setting node attribute: m (is minority?)
        attrs = {n:{'minority':int(G.node[n][G.graph['class']]==H.graph['labels'][1])} for n in G.nodes()}
        nx.set_node_attributes(H, attrs)

        # writing new version
        nx.write_gpickle(H, fnanon)
        print(H.graph)
        n = list(H.nodes())[0]
        printf('{}: {}, {}'.format(n, H.node[n]['minority'], G.node[n][G.graph['class']]))
        print()
        del(G)
        del(H)

<h2>Main</h2>

In [6]:
for dataset in DATASETS:
    anon(dataset, ROOT)    

2020-03-06 02:08:04	aps
aps already done.
{'class': 'pacs', 'label': 'minority', 'name': 'APS', 'labels': ['05.30.-d', '05.20.-y'], 'groups': ['M', 'm']}

2020-03-06 02:08:04	apsgender3
apsgender3 already done.
{'class': 'gender', 'label': 'minority', 'groups': ['M', 'm'], 'labels': ['male', 'female'], 'name': 'APSgender3'}

2020-03-06 02:08:04	apsgender8
apsgender8 already done.
{'class': 'gender', 'label': 'minority', 'groups': ['M', 'm'], 'labels': ['male', 'female'], 'name': 'APSgender8'}

2020-03-06 02:08:05	github
github already done.
{'class': 'gender', 'label': 'minority', 'name': 'Github', 'labels': ['male', 'female'], 'groups': ['M', 'm']}

2020-03-06 02:08:11	pokec
pokec already done.
{'class': 'gender', 'label': 'minority', 'name': 'Pokec', 'labels': [0, 1], 'groups': ['M', 'm']}

2020-03-06 02:09:32	wikipedia
wikipedia already done.
{'class': 'gender', 'label': 'minority', 'name': 'Wikipedia', 'labels': ['male', 'female'], 'groups': ['M', 'm']}

