In [1]:
# Helper function. Adds vectors and b, with a coefficient (1-d) for a and d for b
def add_vectors(vector_a, vector_b, ca, cb):
    keys = set(vector_a.keys()) | set(vector_b.keys())
    result = dict()
    for c in keys:
        p_a = vector_a.get(c)
        if p_a == None:
            p_a = 0.0
        p_b = vector_b.get(c)
        if p_b == None:
            p_b = 0.0
        result[c] = ca * p_a + cb * p_b
    return result

# test
def test_add_vectors():
    ta = {"a":0.6, "b":0.4}
    tb = {"b":1.0}
    print add_vectors(ta, tb, 0.8, 0.2)

test_add_vectors()

{'a': 0.48, 'b': 0.52}


In [2]:
from math import fabs

# Helper function. Computes the absolute difference between vectors and b
def diff_vectors(vector_a, vector_b):
    keys = set(vector_a.keys()) | set(vector_b.keys())
    result = dict()
    for c in keys:
        p_a = vector_a.get(c)
        if p_a == None:
            p_a = 0.0
        p_b = vector_b.get(c)
        if p_b == None:
            p_b = 0.0
        result[c] = fabs(p_a - p_b)
    return result

# test
def test_diff_vectors():
    ta = {"a":0.6, "b":0.4}
    tb = {"b":1.0}
    print diff_vectors(ta, tb)

test_diff_vectors()

{'a': 0.6, 'b': 0.6}


In [3]:
# Helper function. Takes as input a vector/dictionary, where we have nodes and probabilities
# (Implicit assumption that the probabilities add up to 1.0)
# Then removes the specified node, and renormalizes the remaining probabilities
# Warning: Will not work if the removed vector contains a probability of 1.0
def normalize_vector(vector):
    norm = sum(vector.values())
    return { v: 1.0*vector[v]/norm for v in vector}

# test
def test_normalize_vector():
    ta = {"a":6, "b":4}
    print normalize_vector(ta)
    
test_normalize_vector()

{'a': 0.6, 'b': 0.4}


In [4]:
# Helper function. Takes as input a vector/dictionary, where we have nodes and probabilities
# (Implicit assumption that the probabilities add up to 1.0)
# Then removes the specified node, and renormalizes the remaining probabilities
# Warning: Will not work if the removed vector contains a probability of 1.0
def remove_normalize(vector, node_to_remove):
    prob = vector.get(node_to_remove)
    return { v: 1.0*vector[v]/(1.0-prob) for v in vector if v != node_to_remove}

# test
def test_normalize():
    ta = {"a":0.5, "b":0.4, "c":0.1}
    print remove_normalize(ta, "a")
    
test_normalize()

{'c': 0.2, 'b': 0.8}


In [40]:
# Helper function. Computes the logodds between the probabilities in two vectors
from math import log

# Computes the log-odds of two probabilities. 
# The s is a smoothing factor: With 0 we have no smoothing (note: will cause an error with 0 probabilities)
# The n is the total number of nodes
def logodds(pa, pb, s, n):
    pa = 0.0 if pa == None else pa
    pb = 0.0 if pb == None else pb
    a = (pa + s) / ( 1.0 + s*n)
    b = (pb + s) / ( 1.0 + s*n)
    return  log(a,2) - log(b,2)

def logodds_vector(vector_a, vector_b, s):
    keys = set(vector_a.keys()) | set(vector_b.keys())
    n = len(keys)
    return { k: logodds(vector_a.get(k), vector_b.get(k), s, n) for k in keys}

# test
def test_logodds():
    ta = {"a":0.25, "b":0.75}
    tb = {"a":0.75, "b":0.25}
    print logodds_vector(ta, tb, 0)

    ta = {"a":0.5, "b":0.5}
    tb = {"a":0.25, "b":0.25, "c":0.25, "d": 0.25}
    print logodds_vector(ta, tb, 1)
    

test_logodds()


{'a': -1.584962500721156, 'b': 1.584962500721156}
{'a': 0.26303440583379367, 'c': -0.3219280948873622, 'b': 0.26303440583379367, 'd': -0.3219280948873622}


In [41]:
# The Good Vibes model defines the probability of a vibe being noticed as 
# i->j = count(i,j)/sum(count(i,*)) * count(i,j)/sum(count(*,j))
# We represent the adjacency matrix as a dictionary of dictionaries
# i: {j: count(i,j)/sum(count(i,*)) * count(i,j)/sum(count(*,j)) }
def get_good_vibes_adjacency_matrix(msg_counts):
    
    # We first compute the from/to normalizing values for each node
    to_counts = dict()
    from_counts = dict()
    for n,m,c in msg_counts:
        
        # We will ignore self-sending messages
        if (n==m):
            continue
        
        count_n = to_counts.get(n)
        if count_n == None:
            count_n = 0.0
        to_counts[n] = c + count_n
    
        count_m = from_counts.get(m)
        if count_m == None:
            count_m = 0.0
        from_counts[m] = c + count_m

    # We compute the weights using the Good Vibes formula
    adjacency_matrix = dict()
    for n,m,c in msg_counts:
        edges_n = adjacency_matrix.get(n)
        if edges_n == None:
            edges_n = dict()
        edges_n[m] = (1.0*c/to_counts[n]) * (1.0*c/from_counts[m])
        adjacency_matrix[n] = edges_n
    
    # We assigned the unallocated probability into a self-loop
    for n, edges in adjacency_matrix.iteritems():
        total_n = sum(edges.values())
        edges[n] = 1-total_n
        adjacency_matrix[n] = edges

    return adjacency_matrix

message_counts = [
    ("a", "b", 50),
    ("a", "c", 2500),
    ("b", "c", 2500),
    ("b", "d", 250),
    ("c", "a", 250),
    ("c", "b", 250),
    ("c", "d", 250),
    ("d", "c", 500),
    ("d", "a", 20),
]



In [42]:
# This is out adjacency matrix,
# TODO: Construct the adjacency matrix for good-vibes based on message counts
# i: {j: count(i,j)/sum(count(i,*)) * count(i,j)/sum(count(*,j)) }
adjacency_matrix = {"a": {"b": 0.5, "c":0.25, "a":0.25}, 
              "b": {"c": 0.25, "d":0.25, "b": 0.5}, 
              "c": {"a": 0.25, "b": 0.25, "d": 0.25, "c": 0.25},
              "d": {"c":0.25, "a":0.1, "d": 0.65}
             }

In [43]:
# node_probs is a dictionary { "node": probability, ...} with the probabilities on round k
# revised_probs is a dictionary { "node": probability, ... } with the probabilities on round k+1
# for the structure of the adjacency matrix, look above 
def propagate(node_probs, adjacency, smoothing_probs, d):
    revised_probs = dict()
    for n, p_n in node_probs.iteritems():
        # The edge_probs is a dictionary { "node": probability, ...} 
        # that contains the probability that a vibe is propagated
        # from n to m
        propagation_probs_from_n = adjacency.get(n)
        for m, p_n_m in propagation_probs_from_n.iteritems():
            p_m = revised_probs.get(m)
            if p_m == None:
                p_m = 0
            p_m += p_n * p_n_m
            revised_probs[m] = p_m
    
    return add_vectors(revised_probs, smoothing_probs, 1-d, d)

In [44]:
def computePagerank(adjacency, d):
    nodes = set(adjacency.keys())
    smoothing = dict()
    for n in nodes:
        smoothing[n] = 1.0/len(nodes) 
    pagerank = smoothing
    
    for i in range(0,30):
        pagerank_new = propagate(pagerank, adjacency, smoothing, d)
        # check for convergence
        diff = sum(diff_vectors(pagerank_new, pagerank).values())
        pagerank = pagerank_new
        if diff < 0.00001:
            break
            
        
    return pagerank

In [45]:
def computePersonalizedPagerank(adjacency, start, d):
    
    ppr = {start:1.0}
    smoothing_probs = {start:1.0}
    for i in range(0,30):
        ppr = propagate(ppr, adjacency, smoothing_probs, d)
    
    return ppr

In [46]:
d = 0.2
pr = computePagerank(adjacency_matrix, d)
print "PAGERANK\n", pr  

start_node = "a"
ppr = computePersonalizedPagerank(adjacency_matrix, start_node, d)
print "PERSONALIZED PAGERANK for ", start_node, "\n", ppr

PAGERANK
{'a': 0.157142476429525, 'c': 0.25000000000000006, 'b': 0.2714270853523047, 'd': 0.3214304382181704}
PERSONALIZED PAGERANK for  a 
{'a': 0.32000000000000006, 'c': 0.2000000000000001, 'b': 0.2800000000000001, 'd': 0.2000000000000001}


In [47]:
norm_pr = remove_normalize(pr, start_node)
norm_ppr = remove_normalize(ppr, start_node)
log_odds = logodds_vector(norm_ppr, norm_pr, 0)

print "RENORMALIZED PAGERANK after removing ", start_node, "\n", norm_pr
print "RENORMALIZED PERSONALIZED PAGERANK for ", start_node, "\n", norm_ppr
print "LOG-ODDS score for ", start_node, "\n",log_odds

RENORMALIZED PAGERANK after removing  a 
{'c': 0.29661003551461623, 'b': 0.3220319897039034, 'd': 0.38135797478148065}
RENORMALIZED PERSONALIZED PAGERANK for  a 
{'c': 0.2941176470588237, 'b': 0.4117647058823531, 'd': 0.2941176470588237}
LOG-ODDS score for  a 
{'c': -0.012174062289866727, 'b': 0.35461616718444344, 'd': -0.37475252051987806}


In [51]:
# node_probs is a dictionary { "node": probability, ...} with the probabilities on round k
# revised_probs is a dictionary { "node": probability, ... } with the probabilities on round k+1
# for the structure of the adjacency matrix, look above 
def good_vibes(node_probs, adjacency, smoothing_probs, d):
    revised_probs = dict()
    for n, p_n in node_probs.iteritems():
        # The edge_probs is a dictionary { "node": probability, ...} 
        # that contains the probability that a vibe is propagated
        # from n to m
        propagation_probs_from_n = adjacency.get(n)
        for m, p_n_m in propagation_probs_from_n.iteritems():
            p_m = revised_probs.get(m)
            
            # We propagate through a DAG, so if there is already assigned probability, we are done.
            if p_m == None:
                p_m = 0.0
            else:
                continue
            p_m += p_n * p_n_m
            revised_probs[m] = p_m
    
    result = add_vectors(revised_probs, smoothing_probs, 1-d, d)
    return result

In [52]:
def computeGoodVibes(adjacency, start, d):
    
    gv = {start:1.0}
    smoothing_probs = {start:1.0}
    for i in range(0,30):
        gv_new = normalize_vector( good_vibes(gv, adjacency, smoothing_probs, d) )
        smoothing_probs = normalize_vector( add_vectors(gv_new, gv, 1-d, d) )
        gv = gv_new
    return gv

In [53]:
# This is out adjacency matrix,
# TODO: Construct the adjacency matrix for good-vibes based on message counts
# i: {j: count(i,j)/sum(count(i,*)) * count(i,j)/sum(count(*,j)) }
adjacency_gv = {"a": {"b": 0.5, "c":0.25}, 
              "b": {"c": 0.25, "d":0.25}, 
              "c": {"a": 0.25, "b": 0.25, "d": 0.25},
              "d": {"c":0.25, "a":0.1}
             }

In [55]:
d = 0.2
start_gv = "a"
good_vibes_vector = computeGoodVibes(adjacency_gv, start_gv, d)
print "GOOD VIBES\n", remove_normalize(good_vibes_vector, start_gv)

GOOD VIBES
{'c': 0.2500000080250903, 'b': 0.5000000160501806, 'd': 0.24999997592472903}
