#### This notebook demonstrates the use of InFoRM algorithms to mitigate bias for PageRank
InFoRM includes 3 algorithms, namely debiasing the input graph, debiasing the mining model and debiasing the mining result. We will show how to run all 3 algorithms for PageRank in this notebook.

### Get vanilla PageRank vector first

In [1]:
# load necessary packages
import pickle
import load_graph
import utils

import numpy as np
import networkx as nx

from scipy.sparse import csc_matrix

In [2]:
def vanilla(name):
    try:
        with open('result/pagerank/vanilla.pickle', 'rb') as f:
            rdict = pickle.load(f)
    except:
        rdict = dict()

    if name == 'ppi':
        data = load_graph.read_mat(name)
        adj = data['adjacency']
    else:
        graph = load_graph.read_graph(name)
        adj = nx.to_scipy_sparse_matrix(graph, dtype='float', format='csc')

    adj = utils.symmetric_normalize(adj)
    graph = nx.from_scipy_sparse_matrix(adj, create_using=nx.Graph())

    r = utils.power_method(graph, c=0.85)
    r = np.array([list(r.values())])
    r = csc_matrix(np.array(r).transpose())
    rdict[name] = r

    with open('result/pagerank/vanilla.pickle', 'wb') as f:
        pickle.dump(rdict, f, protocol=pickle.HIGHEST_PROTOCOL)

In [3]:
# get vanilla PageRank vector
vanilla(name='ppi')

### Let's debias the input graph

In [4]:
# load debias model
from method.debias_graph import DebiasGraph

In [5]:
def debias_input_graph(name, c=0.85, alpha=0., lr=0., metric=None):
    if name == 'ppi':
        data = load_graph.read_mat(name)
        init_adj = data['adjacency']
    else:
        graph = load_graph.read_graph(name)
        init_adj = nx.to_scipy_sparse_matrix(graph, dtype='float', format='csc')
    init_adj = utils.symmetric_normalize(init_adj)

    # build similarity matrix
    sim = utils.filter_similarity_matrix(utils.get_similarity_matrix(init_adj, metric=metric), sigma=0.75)
    sim = utils.symmetric_normalize(sim)

    # debias pagerank
    FairGraph = DebiasGraph()
    graph = FairGraph.pagerank(init_adj, sim, alpha, lr=lr, c=c, tol=1e-6, maxiter=100)
    r = utils.power_method(graph, c=c)

    r = np.array([list(r.values())])
    r = csc_matrix(np.array(r).transpose())

    print('dataset: {}\t metric: {} similarity'.format(name, metric))
    print('Finished!')

    return r

In [6]:
# jaccard index
result = dict()
result['ppi'] = debias_input_graph(name='ppi', c=0.85, alpha=1e6, lr=5e-4, metric='jaccard')
with open('result/pagerank/graph/jaccard.pickle', 'wb') as f:
    pickle.dump(result, f, protocol=pickle.HIGHEST_PROTOCOL)

# cosine similarity    
result = dict()
result['ppi'] = debias_input_graph(name='ppi', c=0.85, alpha=1e6, lr=5e-4, metric='cosine')
with open('result/pagerank/graph/cosine.pickle', 'wb') as f:
    pickle.dump(result, f, protocol=pickle.HIGHEST_PROTOCOL)

dataset: ppi	 metric: jaccard similarity
Finished!
dataset: ppi	 metric: cosine similarity
Finished!


### Let's debias the mining model

In [7]:
# load debias model
from method.debias_model import DebiasModel

In [8]:
def debias_mining_model(name, alpha=0., metric=None):
    # init params
    c = 0.85

    # load dataset
    if name == 'ppi':
        data = load_graph.read_mat(name)
        adj = data['adjacency']
    else:
        graph = load_graph.read_graph(name)
        adj = nx.to_scipy_sparse_matrix(graph, dtype='float', format='csc')
    adj = utils.symmetric_normalize(adj)

    # build similarity matrix
    sim = utils.filter_similarity_matrix(utils.get_similarity_matrix(adj, metric=metric), sigma=0.75)
    sim = utils.symmetric_normalize(sim)

    # debias pagerank
    FairModel = DebiasModel()
    r = FairModel.pagerank(adj, sim, alpha, c=c)
    r = np.array([list(r.values())])
    r = csc_matrix(np.array(r).transpose())

    print('dataset: {}\t metric: {} similarity'.format(name, metric))
    print('Finished!')

    return r

In [9]:
alpha = 0.5

# jaccard index
result = dict()
result['ppi'] = debias_mining_model(name='ppi', alpha=alpha, metric='jaccard')
with open('result/pagerank/model/jaccard.pickle', 'wb') as f:
    pickle.dump(result, f, protocol=pickle.HIGHEST_PROTOCOL)

# cosine similarity    
result = dict()
result['ppi'] = debias_mining_model(name='ppi', alpha=alpha, metric='cosine')
with open('result/pagerank/model/cosine.pickle', 'wb') as f:
    pickle.dump(result, f, protocol=pickle.HIGHEST_PROTOCOL)

dataset: ppi	 metric: jaccard similarity
Finished!
dataset: ppi	 metric: cosine similarity
Finished!


### Let's debias the mining result

In [10]:
# load debias model
from method.debias_result import DebiasResult

In [11]:
def debias_mining_result(name, vanilla, alpha=0., metric=None):
    # vanilla result
    r = vanilla[name]

    # load dataset
    if name == 'ppi':
        data = load_graph.read_mat(name)
        adj = data['adjacency']
    else:
        graph = load_graph.read_graph(name)
        adj = nx.to_scipy_sparse_matrix(graph, dtype='float', format='csc')
    adj = utils.symmetric_normalize(adj)

    # build similarity matrix
    sim = utils.filter_similarity_matrix(utils.get_similarity_matrix(adj, metric=metric), sigma=0.75)
    sim = utils.symmetric_normalize(sim)

    # debias pagerank
    # r = pagerank.debias_result(r, S, lambda_)
    FairResult = DebiasResult()
    r = FairResult.fit(r, sim, alpha)

    print('dataset: {}\tmetric: {} similarity'.format(name, metric))
    print('Finished!')

    return r

In [12]:
alpha = 0.5

with open('result/pagerank/vanilla.pickle', 'rb') as f:
    vanilla = pickle.load(f)

# jaccard index
result = dict()
result['ppi'] = debias_mining_result(name='ppi', vanilla=vanilla, alpha=alpha, metric='jaccard')
with open('result/pagerank/result/jaccard.pickle', 'wb') as f:
    pickle.dump(result, f, protocol=pickle.HIGHEST_PROTOCOL)

# cosine similarity    
result = dict()
result['ppi'] = debias_mining_result(name='ppi', vanilla=vanilla, alpha=alpha, metric='cosine')
with open('result/pagerank/result/cosine.pickle', 'wb') as f:
    pickle.dump(result, f, protocol=pickle.HIGHEST_PROTOCOL)

dataset: ppi	metric: jaccard similarity
Finished!
dataset: ppi	metric: cosine similarity
Finished!


### Now, let's see how much we debiased and how good debiased results are

In [13]:
# load evaluation functions
from evaluate.pagerank import *

In [14]:
evaluate(name='ppi', metric='jaccard', task='graph')
evaluate(name='ppi', metric='cosine', task='graph')
evaluate(name='ppi', metric='jaccard', task='model')
evaluate(name='ppi', metric='cosine', task='model')
evaluate(name='ppi', metric='jaccard', task='result')
evaluate(name='ppi', metric='cosine', task='result')

{'dataset': 'ppi', 'metric': 'jaccard similarity', 'task': 'debias the input graph', 'diff': 0.18492063442801046, 'kl': 0.0018969515975970268, 'precision': {50: 0.92}, 'ndcg': {50: 0.9442572100682158}, 'bias': 0.43385507693395464}
{'dataset': 'ppi', 'metric': 'cosine similarity', 'task': 'debias the input graph', 'diff': 0.3280772974802958, 'kl': 0.008071437274667047, 'precision': {50: 0.78}, 'ndcg': {50: 0.8381436001251663}, 'bias': 0.686652500412535}
{'dataset': 'ppi', 'metric': 'jaccard similarity', 'task': 'debias the mining model', 'diff': 0.21119486360528836, 'kl': 0.004778336149265419, 'precision': {50: 0.92}, 'ndcg': {50: 0.9418922547520626}, 'bias': 0.5082799234158019}
{'dataset': 'ppi', 'metric': 'cosine similarity', 'task': 'debias the mining model', 'diff': 0.2799128177329693, 'kl': 0.00956437934504764, 'precision': {50: 0.9}, 'ndcg': {50: 0.9283320973565875}, 'bias': 0.674944673637444}
{'dataset': 'ppi', 'metric': 'jaccard similarity', 'task': 'debias the mining result', '