In [None]:
import numpy as np
from collections import defaultdict
import random
import copy
import pickle

# 1. Load input

In [None]:
lines = 500000

In [None]:
Eij = pickle.load(open("./extracted/E_newman"+ str(lines) + ".p", "rb"))

In [None]:
U = pickle.load(open("./extracted/U"+ str(lines) + ".p", "rb"))
U = list(U)

In [None]:
Eij_max = max([Eij[i][j] for i in Eij for j in Eij[i]])
print('Max number of retweets:', Eij_max)

# B. Apply Newman's algorithm

Network shape:

- We have **N** number of nodes/users 
- We count how many times a user retweed another user (directly available from Eij).
- From this information, we want to infer the friendship structure of the users
- The number of possible pairs (directed edges) is **N*(N-1)**

In [None]:
n = len(U)
N = Eij_max
pairs = n*(n-1)

print('Number of nodes:', n)
print('Number of measurements for each pair:', N)
print('Number of possible pairs:', pairs)

active_pairs = sum(len(Eij[i]) for i in Eij)
dead_pairs = pairs - active_pairs

print('Number of active pairs:', active_pairs)
print('Number of pairs without interactions:', dead_pairs)

In [None]:
eps = 10**(-3) # convergence criterion 
repetitions = 100 # number of times we repeat the algorithm

In [None]:
def flatten(obj):
    if type(obj) == list:
        return [l for L in obj for l in L]
    if type(obj) == dict:
        return [l for i in obj for l in obj[i].values()]
    if type(obj) == defaultdict:
        return [l for i in obj for l in obj[i].values()]

Initialize Newman's parameteres α,β,ρ randomly

- α in [0.5,1], the true positive rate
- β in [0, 0.5], the false positive rate 
- ρ in [0, 1], the prior probability of any edge existing on the network


In [None]:
def newman(Eij, dead_pairs, pairs, repetitions, eps):
        """ 
        This function is the main algorithm for path inference with constraints according to Newman[1].

        [1] M. E. J. Newman, ``Network structure from rich but noisy data'', Nature Physics, 
        vol. 14, 2018, pp. 67-75.
        """
    while True:
        it = 0
        Qij = defaultdict(dict)
        while it<200:
            if it==0:
                # same init values as in constrained-em
                a = 0.9999
                b = 0.0001 
                r = 0.003
            else:
                a = (sum([Eij[i][j] * Qij[i][j] for i in Eij for j in Eij[i]]))/(N * (sum([Qij[i][j] for i in Qij for j in Qij[i]]) + dead_pairs * Q_dead))
                b = (sum([Eij[i][j] * (1-Qij[i][j]) for i in Eij for j in Eij[i]]))/(N * (sum([(1-Qij[i][j]) for i in Qij for j in Qij[i]]) + dead_pairs * (1-Q_dead)))
                r = (sum([Qij[i][j] for i in Qij for j in Qij[i]]) + dead_pairs * Q_dead) / (pairs)
            for i in Eij:
                for j in Eij[i]:
                    e = Eij[i][j]
                    Qij[i][j] = (r * (a**e) * ((1-a) ** (N-e)))/((r * (a ** e) * (1-a) ** (N-e)) + (1-r) * (b ** e) * ((1-b) ** (N-e)))

            Q_dead = (r * ((1-a) ** N)) / ((r * (1-a)**N) + (1-r) * ((1-b) ** N))
            if it!=0:
                old_q = np.array(flatten(Qij_old))
                new_q = np.array(flatten(Qij))
                change = np.linalg.norm(new_q - old_q)
                if change < eps: break
            Qij_old = copy.deepcopy(Qij)
            it = it +1
        return a, b , r, Qij

In [None]:
random.seed(10)
a, b, r, Qij = newman(Eij, dead_pairs, pairs, 1, eps)

In [None]:
try: 
    pickle.dump(Qij, open("./extracted/Q_newman_"+ str(lines) + ".p", "wb"))
except: 
    print("Unable to write to file")