In [1]:
import numpy as np
import struct

In [2]:
def idx3_decode(file):
    data=open(file, 'rb').read()
    # > for Big-endian, iiii for 4 integers, each size=4
    fmt='>iiii'
    offset=0
    magic_number, image_numbers, height, width=struct.unpack_from(fmt,data,offset)
    image_size=height*width
    offset+=struct.calcsize(fmt)
    # B for unsigned byte, size=1
    fmt='>'+str(image_size)+'B'
    images=np.empty((image_numbers,height*width))
    for i in range(image_numbers):
        images[i]=np.array(struct.unpack_from(fmt,data,offset)).reshape((height*width))
        offset+=struct.calcsize(fmt)
    return images,image_numbers

In [3]:
def idx1_decode(file):
    data=open(file, 'rb').read()
    # > for Big-endian, ii for 2 integers, each size=4
    fmt='>ii'
    offset=0
    magic_number, label_numbers=struct.unpack_from(fmt,data,offset)
    offset+=struct.calcsize(fmt)
    # B for unsigned byte, size=1
    fmt='>B'
    labels=np.empty(label_numbers)
    for i in range(label_numbers):
        labels[i]=struct.unpack_from(fmt,data,offset)[0]
        offset+=struct.calcsize(fmt)
    return labels,label_numbers

In [4]:
def E_step():
    for n in range(60000):
        temp = lamda.copy()
        for k in range(10):
            for d in range(784):
                if (X[n,d]==1):
                    if(P[d,k]==0):
                        temp[k] *= 0.0001
                    else: 
                        temp[k] *= P[d,k]
                else:
                    if(P[d,k]==1):
                        temp[k] *= 0.0001
                    else:
                        temp[k] *= 1-P[d,k]
        for k in range(10):
            if(np.sum(temp)==0):
                W[n,k] = temp[k]/0.0001
            else:
                W[n,k] = temp[k]/np.sum(temp)

In [5]:
def M_step():
    sigma_w = np.sum(W,axis=0)
    lamda = sigma_w/60000
    for k in range(10):
        for d in range(784):
            P[d][k] = np.dot(np.transpose(X)[d],np.transpose(W)[k])
            if(sigma_w[k]==0):
                P[d][k] /= 0.0001
            else:
                P[d][k] /= sigma_w[k]

In [6]:
def print_imagination():
    for k in range(10):
        print('\nclass {}:'.format(k))
        for d in range(784):
            if d%28==0 and d!=0:
                print('')
            if P[d,k]>0.5:
                print('1',end='')
            else:
                print('0',end='')

In [7]:
def print_labeled_imagination(r):
    for i,k in enumerate(r):
        print('labeled class {}:'.format(i))
        for d in range(784):
            if d%28==0 and d!=0:
                print('')
            if P[d,int(k)]>0.5:
                print('1',end='')
            else:
                print('0',end='')
        print('\n')
    print("\n----------------------------------------------------")

In [14]:
def confusion(r):
    confusion_matrix = np.zeros((10,3))
    error = 0
    for n in range(60000):
        temp = lamda.copy()
        for k in range(10):
            for d in range(784):
                if (X[n,d]==1):
                    if(P[d,k]==0):
                        temp[k] *= 0.0001
                    else: 
                        temp[k] *= P[d,k]
                else:
                    if(P[d,k]==1):
                        temp[k] *= 0.0001
                    else:
                        temp[k] *= 1-P[d,k]
        if(int(train_label[n])==np.argmax(temp)):
            confusion_matrix[int(train_label[n]),0]+=1
        else:
            confusion_matrix[int(train_label[n]),1]+=1
            confusion_matrix[int(np.argmax(temp)),2]+=1
    for k in range(10):
        print("\nConfusion Matrix:") 
        print("                Predict number {} Predict not number {}".format(k,k))
        print("Is number {}           {}                {}".format(k,confusion_matrix[k,0],confusion_matrix[k,1]))
        print("Isn\'t number {}       {}                {}".format(k,confusion_matrix[k,2],60000-np.sum(confusion_matrix[k])))
        print("\nSensitivity (Successfully predict cluster 1): {:.5f}".format(confusion_matrix[k,0]/(confusion_matrix[k,0]+confusion_matrix[k,1])))
        print("Specificity (Successfully predict cluster 2): {:.5f}".format((60000-np.sum(confusion_matrix[k]))/(confusion_matrix[k,2]+60000-np.sum(confusion_matrix[k]))))
        print("\n----------------------------------------------------")
        error+=confusion_matrix[k,1]+confusion_matrix[k,2]
    return error

In [15]:
def clustering():
    table = np.zeros((10,10))
    label_class_relation = np.zeros(10)
    for n in range(60000):
        temp = lamda.copy()
        for k in range(10):
            for d in range(784):
                if (X[n,d]==1):
                    if(P[d,k]==0):
                        temp[k] *= 0.0001
                    else: 
                        temp[k] *= P[d,k]
                else:
                    if(P[d,k]==1):
                        temp[k] *= 0.0001
                    else:
                        temp[k] *= 1-P[d,k]
        table[int(train_label[n]),np.argmax(temp)]+=1
    print(table)
    for k in range(10):
        label_class_relation[k] = np.argmax(table[k])
    print_labeled_imagination(label_class_relation)
    return confusion(label_class_relation)

In [16]:
train_image_path='train-images.idx3-ubyte'
train_label_path='train-labels.idx1-ubyte'
train_image,train_image_number=idx3_decode(train_image_path)
train_label,train_label_number=idx1_decode(train_label_path)
train_image = train_image//128

In [17]:
train_image.shape

(60000, 784)

In [18]:
X = train_image.copy()
lamda = np.full((10,1),0.1,dtype=np.float64) # init prob for every class
P = np.random.rand(28*28,10) # init prob for every pixel of every class
P_prev = P.copy()
W = np.zeros((60000,10)) # init w for every pic for every class 

In [19]:
iteration = 0
while(1):
    iteration += 1
    E_step()
    M_step()
    print_imagination()
    diff = np.linalg.norm(P-P_prev)
    print("\nNo. of Iteration: {}, Difference: {}".format(iteration, diff))
    print("\n----------------------------------------------------")
    if diff < 0.001:
        break
    P_prev = P
print("\n----------------------------------------------------")
print("----------------------------------------------------")
error = clustering()
print('Total iteration to coverage: {}'.format(iteration))
print('Total error rate: {}'.format(error))


class 0:
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000001010000000000000000
0000000001110000000000000000
0000000001110000000000000000
0000000000111000000000000000
0000000000111100000000000000
0000000000111111111000000000
0000000000111111111111000000
0000000000111111111111100000
0000000000111111111111100000
0000000000111111011111100000
0000000001111110000011110000
0000000011111100000011110000
0000000011111100000011110000
0000000011111000000111110000
0000000011110000001111100000
0000000011111111111111100000
0000000011111111111111000000
0000000011111111111110000000
0000000001111111110000000000
0000000000011100000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
class 1:
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
000000000000000000000000


class 0:
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000001010000000000000000
0000000001110000000000000000
0000000001110000000000000000
0000000000111000000000000000
0000000000111100000000000000
0000000000111111111000000000
0000000000111111111111000000
0000000000111111111111100000
0000000000111111111111100000
0000000000111111011111100000
0000000001111110000011110000
0000000011111100000011110000
0000000011111100000011110000
0000000011111000000111110000
0000000011110000001111100000
0000000011111111111111100000
0000000011111111111111000000
0000000011111111111110000000
0000000001111111110000000000
0000000000011100000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
class 1:
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
0000000000000000000000000000
000000000000000000000000

[[1.000e+00 2.580e+02 1.700e+01 3.260e+02 5.840e+02 0.000e+00 6.000e+00
  0.000e+00 4.670e+03 6.100e+01]
 [0.000e+00 2.300e+01 5.200e+01 0.000e+00 1.207e+03 1.000e+00 5.458e+03
  1.000e+00 0.000e+00 0.000e+00]
 [0.000e+00 2.900e+02 2.612e+03 3.190e+02 1.804e+03 0.000e+00 7.400e+02
  1.000e+00 1.580e+02 3.400e+01]
 [0.000e+00 5.210e+02 2.360e+02 1.300e+01 1.542e+03 0.000e+00 2.592e+03
  2.300e+01 1.188e+03 1.600e+01]
 [0.000e+00 2.350e+03 5.000e+00 2.383e+03 8.570e+02 0.000e+00 2.450e+02
  0.000e+00 2.000e+00 0.000e+00]
 [0.000e+00 2.918e+03 9.000e+00 3.900e+01 1.123e+03 0.000e+00 3.980e+02
  2.000e+00 8.950e+02 3.700e+01]
 [0.000e+00 2.070e+02 3.000e+00 1.047e+03 4.472e+03 0.000e+00 7.500e+01
  0.000e+00 1.070e+02 7.000e+00]
 [0.000e+00 5.185e+03 1.000e+01 1.210e+02 4.190e+02 0.000e+00 5.200e+02
  0.000e+00 1.000e+01 0.000e+00]
 [0.000e+00 1.327e+03 6.700e+01 1.220e+02 9.770e+02 0.000e+00 1.604e+03
  8.810e+02 1.690e+02 7.040e+02]
 [0.000e+00 3.345e+03 0.000e+00 1.155e+03 1.066e+03 0.0


Confusion Matrix:
                Predict number 0 Predict not number 0
Is number 0           1.0                5922.0
Isn't number 0       0.0                54077.0

Sensitivity (Successfully predict cluster 1): 0.00017
Specificity (Successfully predict cluster 2): 1.00000

----------------------------------------------------

Confusion Matrix:
                Predict number 1 Predict not number 1
Is number 1           23.0                6719.0
Isn't number 1       16401.0                36857.0

Sensitivity (Successfully predict cluster 1): 0.00341
Specificity (Successfully predict cluster 2): 0.69205

----------------------------------------------------

Confusion Matrix:
                Predict number 2 Predict not number 2
Is number 2           2612.0                3346.0
Isn't number 2       399.0                53643.0

Sensitivity (Successfully predict cluster 1): 0.43840
Specificity (Successfully predict cluster 2): 0.99262

-----------------------------------------------