In [123]:
import numpy as np
import math
import random
from scipy.stats import bernoulli


def MV(Psi):
    """
    The majority voting method.
    """
    return [np.bincount([y[1] for y in x])/(0.0+len(x)) for x in Psi]


def EM(N, M, Psi, inv_Psi):
    """
    The expectation maximization method (EM) from Dong et al., 2013.
    """
    # to compute exp of each cell in a matrix
    exp = np.vectorize(math.exp)

    # convergence eps
    eps = 0.01

    # init accuracies
    A = [0.8]*N
    while True:
        # accuracy scores
        AS = [math.log(x/(1-x)) for x in A]

        # confidence of values
        C_v = [[0, 0]]*M

        # summing accuracy scores of value providers
        for obj_id in range(M):
            for (source_id, value_id) in Psi[obj_id]:
                C_v[obj_id][value_id] += AS[source_id]

        # compute probs        
        p = [x/sum(x) for x in exp(C_v)]

        # update accuracies
        A_new = [np.average([p[y[0]][y[1]] for y in x]) for x in inv_Psi]

        # convergence check
        if sum(np.subtract(A,A_new)) < eps:
            break
        else:
            A = A_new
    return A, p


def log_likelihood(Psi, A, p):
    """
    Computes the log likelihood of the Psi using A and p.
    """
    res = 0
    for obj_id in range(M):
        for source_id, value_id in Psi[obj_id]:
            if value_id == 1:
                res += math.log(A[source_id]*p[obj_id][value_id])
            else:
                res += math.log((1-A[source_id])*(1-p[obj_id][value_id]))
    return res


def random_log_likelihood(N, M, Psi):
    """
    Searches for the max log likelihood at random.
    """
    # number of attempts
    N_iter = 10000

    max_log_likelihood = -100
    bf_A = []
    bf_p = []
    for i in range(N_iter):
        A = np.random.uniform(0.8, 1.0, N)
        p = [[1-x, x] for x in np.random.uniform(0, 1, M)]
        cur_ll = log_likelihood(Psi, A, p)
        if cur_ll > max_log_likelihood:
            max_log_likelihood = cur_ll
            bf_A = A
            bf_p = p

    return bf_A, bf_p
    

def mcmc():
    """
    MCMC for log-likelihood maximum search.
    """
    # random init
    A = np.random.uniform(0.8, 1.0, N)
    p = [[1-x, x] for x in np.random.uniform(0, 1, M)]
    
    # MCMC sampling
    # update sources (this is MAP!)
    A = [np.average([p[obj_id][value_id] for obj_id, value_id in inv_Psi[source_id]]) for source_id in range(N)]
    # update objects


# number of sources
N = 3
# number of objects
M = 5
# observations
Psi = [[(0, 1), (1, 1), (2, 0)],
       [(0, 1), (1, 0), (2, 1)],
       [(0, 0), (1, 1), (2, 0)],
       [(0, 1), (1, 0), (2, 1)],
       [(0, 1), (1, 1), (2, 1)]]

# inverted observations
inv_Psi = [[(0, 1), (1, 1), (2, 0), (3, 1), (4, 1)],
          [(0, 1), (1, 0), (2, 1), (3, 0), (4, 1)],
          [(0, 0), (1, 1), (2, 0), (3, 1), (4, 1)]]

mv_p = MV(Psi)
em_A, em_p = EM(N, M, Psi, inv_Psi)
bf_A, bf_p = random_log_likelihood(N, M, Psi)

print('MV Pr: {}'.format(np.average([x[1] for x in mv_p])))
print('EM Pr: {}, log-likelihood: {}'.format(np.average([x[1] for x in em_p]), log_likelihood(Psi, em_A, em_p)))
print('Random Loglikehood Pr: {}, log-likelihood: {}'.format(np.average([x[1] for x in bf_p]), log_likelihood(Psi, bf_A, bf_p)))


MV Pr: 0.666666666667
EM Pr: 0.993016685642, log-likelihood: -9.33725210422
Random Loglikehood Pr: 0.88864298872, log-likelihood: -13.0462257192
