This notebook is for estimating the parameters using the YN question framework.

In [1]:
import numpy as np
import pymc as pm
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
#Reading in the data
data = np.load('../data/preset_YN_100_5_4.npz.npy')
trueCls = np.load('../data/preset_MC_100_5_4_reference_classes.npy')
data.shape

(100, 5, 4, 2)

In [3]:
#Inferring the number of classes and data length
numData = data.shape[0]
numExperts = data.shape[1]
numClasses = data.shape[2]

In [4]:
alpha = [1, 1, 1, 1]
rho = pm.Dirichlet('rho',theta=alpha, plot=False)
rho_completedD = pm.CompletedDirichlet('rho_completed', rho, plot=False)
z = pm.Container([pm.Multinomial('z_%i' %i,n=1,p=rho, plot=False) for i in range(numData)])
lam = 10
lambda_mat = lam*np.eye( numClasses) + np.ones((numClasses, numClasses))
confMat = pm.Container([pm.Dirichlet(name='theta_row_'+str(i), theta=lambda_mat[i,:], plot=False)\
                      for i in range(numClasses)])
confMatHat = pm.Container([pm.CompletedDirichlet('theta_row_hat'+str(i),\
                    confMat[i], plot=False) for i in range(numClasses)])

In [5]:
@pm.observed
def mc_likelihood(value=data, z=z, confMatHat=confMatHat, rho=rho_completedD):
    """
    value - data
    z - observed classes
    confMat - confidence matrix
    """
    log_like = 0
    numData = data.shape[0]
    numExperts = data.shape[1]
    numClasses = data.shape[2]
    for data_idx in range(numData):
        dat = data[data_idx]
        observed_cls = z[data_idx]
        class_num = np.where(observed_cls == 1)[0][0]
        data_val = np.sum(dat, axis=0)
        for cls_idx in range(numClasses):
            log_like += observed_cls[cls_idx]*(data_val[cls_idx, 1]*np.log(confMatHat[class_num])[0][cls_idx]+\
                                              data_val[cls_idx, 0]*np.log(1-confMatHat[class_num])[0][cls_idx])
    return log_like

In [5]:
@pm.observed
def mc_likelihood2(value=data, z=z, confMatHat=confMatHat, rho=rho_completedD):
    """
    value - data
    z - observed classes
    confMat - confidence matrix
    """
    log_like = 0
    numData = data.shape[0]
    numExperts = data.shape[1]
    numClasses = data.shape[2]
    for data_idx in range(numData):
        dat = data[data_idx]
        observed_cls = z[data_idx]
        class_num = np.where(observed_cls == 1)[0][0]
        data_val = np.sum(dat, axis=0)
        computed_val = np.sum(np.multiply(data_val[:,1], np.log(confMatHat[class_num]))) + \
                        np.sum(np.multiply(data_val[:,0], np.log(1-confMatHat[class_num])))
        log_like += np.sum(np.multiply(observed_cls, computed_val))
    
    return log_like

In [6]:
mod = pm.Model([rho_completedD, z, confMatHat, mc_likelihood2])

In [7]:
mcmc = pm.MCMC(mod)
mcmc.sample(iter=10000, burn=1000, thin=5)
pm.Matplot.plot(mcmc)

 [-----------------100%-----------------] 10000 of 10000 complete in 862.1 sec

#### For the data file preset_MC_200_5_4.npz.npy

In [None]:
print "Confusion Matrix"
for idx in range(numClasses):
    print np.mean(confMatHat[idx].trace()[:], axis=0)

In [None]:
final_z = []
for i in range(numData):
    final_z.append(np.argmax(np.mean(z[i].trace()[-1000:], axis=0)))

In [None]:
np.sum(final_z == trueCls)

In [None]:
np.mean(rho_completedD.trace(), axis=0)

#### For the data file preset_MC_100_5_4.npz.npy

In [8]:
print "Confusion Matrix"
for idx in range(numClasses):
    print np.mean(confMatHat[idx].trace()[:], axis=0)

Confusion Matrix
[[ 0.79862261  0.0713451   0.06464208  0.0653902 ]]
[[ 0.13356432  0.69175226  0.1012729   0.07341052]]
[[ 0.14487684  0.07301555  0.72523597  0.05687164]]
[[ 0.08823133  0.09162233  0.08660616  0.73354018]]


In [12]:
final_z = []
for i in range(numData):
    final_z.append(np.argmax(np.mean(z[i].trace()[:], axis=0)))

In [13]:
np.sum(final_z == trueCls)

97

In [14]:
np.mean(rho_completedD.trace(), axis=0)

array([[ 0.22791403,  0.24588491,  0.26319636,  0.2630047 ]])

#### For the data file preset_MC_40_5_4.npz.npy

In [8]:
print "Confusion Matrix"
for idx in range(numClasses):
    print np.mean(confMatHat[idx].trace()[:], axis=0)

Confusion Matrix
[[ 0.84907127  0.02285125  0.06902741  0.05905007]]
[[ 0.03317905  0.82001513  0.07106986  0.07573595]]
[[ 0.04217676  0.08334965  0.76888768  0.1055859 ]]
[[ 0.01488788  0.05694666  0.07228057  0.85588489]]


In [9]:
final_z = []
for i in range(numData):
    final_z.append(np.argmax(np.mean(z[i].trace()[-1000:], axis=0)))

In [12]:
np.sum(final_z == trueCls)

40

In [10]:
np.array(final_z), trueCls

(array([0, 1, 1, 3, 1, 2, 0, 1, 2, 0, 2, 0, 1, 3, 0, 2, 3, 3, 1, 3, 2, 3, 3,
        0, 2, 2, 2, 1, 0, 3, 0, 2, 1, 1, 1, 0, 0, 2, 3, 3]),
 array([0, 1, 1, 3, 1, 2, 0, 1, 2, 0, 2, 0, 1, 3, 0, 2, 3, 3, 1, 3, 2, 3, 3,
        0, 2, 2, 2, 1, 0, 3, 0, 2, 1, 1, 1, 0, 0, 2, 3, 3]))

In [11]:
np.mean(rho_completedD.trace(), axis=0)

array([[ 0.24865265,  0.25481833,  0.24264308,  0.25388593]])