In [1]:
from mnist_load import MNIST
import numpy as np
import matplotlib.pyplot as plt
from util import *

In [2]:
# 0:discrete / 1:continuous
mode = 0

In [3]:
mndata = MNIST('../data')
mndata.gz = True
images,labels = mndata.load_training()
test_images,test_labels = mndata.load_testing()
images = np.array(images)

In [4]:
#for visualize
"""
for i in range(100):
    p = np.array(images[i], dtype='uint8') 
    p = p.reshape((28, 28))
    #print(p)
    plt.imshow(p, cmap='gray')
    plt.show()
    print("label" + str(i) + ":" + str(labels[i]))
"""

'\nfor i in range(100):\n    p = np.array(images[i], dtype=\'uint8\') \n    p = p.reshape((28, 28))\n    #print(p)\n    plt.imshow(p, cmap=\'gray\')\n    plt.show()\n    print("label" + str(i) + ":" + str(labels[i]))\n'

In [5]:
# get prior
labels = np.array(labels)
prior = get_prior(labels)

[5 0 4 ... 5 6 8]


In [6]:
if mode == 0:
    # for train
    discrete_bins = np.zeros((10, 28*28, 32))
    for img in range(len(images)):
        c = labels[img]
        for d in range(28*28):
            #print(int(images[img][d])//8)
            discrete_bins[c, d, int(images[img][d])//8] += 1  #每8個gray level分一個bin
    for c in range(10):
        for d in range(28*28):
            count = 0
            for bin in range(32):
                count += discrete_bins[c][d][bin]
            discrete_bins[c][d][:] /= count   #conditional probability of every bin
    # for test
    error_cnt = 0
    for img in range(len(test_images)):
        probs = np.zeros(10)
        for p in range(10):
            for d in range(28*28):
                # value相乘 = log相加
                probs[p] += np.log(max(1e-4, discrete_bins[p, d, test_images[img][d]//8]))
                # 避免log 0的情況 (1/60000 大概是1e-4)
            probs[p] += np.log(prior[p])

        probs /= np.sum(probs)
        print('Posterior (in log scale):')
        for c in range(10):
            print('{}: {}'.format(c, probs[c]))
        predict=np.argmin(probs)  # np.sum是negative，所以這裡取min
        print('Prediction: {}, Ans: {}'.format(predict, test_labels[img]))
        print()
        if predict != test_labels[img]:
            error_cnt += 1
    
    # Imagination of numbers in Bayesian classifier:
    print_imagination_number(discrete_bins, 16)
    print('Error rate: {:.4f}'.format(error_cnt / len(test_images)))


elif mode == 1:
    continuous_prob = np.zeros((10, 28*28, 256))
    # for train
    epsilon_var = 1000  #實驗結果:越大越好
    
    for c in range(10):
        A = images[labels==c]
        for d in range(28*28):
            mu = np.mean(A[:, d])
            var = get_variance(A[:, d], epsilon_var)  #避免var是0
            for j in range(256):
                continuous_prob[c, d, j] = gaussain_prob(j, mu, var)
    # for test
    error_cnt = 0
    for img in range(len(test_images)):
        probs = np.zeros(10)
        for c in range(10):
            for d in range(28*28):
                probs[c] += np.log(max(1e-30, continuous_prob[c, d, test_images[img][d]]))
                # 避免continuous_prob太小log算不出來(實驗結果:越小越好)
            probs[c] += np.log(prior[c])
        # for normalized
        probs /= np.sum(probs)
        print('Posterior (in log scale):')
        for c in range(10):
            print('{}: {}'.format(c, probs[c]))
        predict=np.argmin(probs)
        print('Prediction: {}, Ans: {}'.format(predict, test_labels[img]))
        print()
        if predict != test_labels[img]:
            error_cnt += 1
    print_imagination_number(continuous_prob, 128)
    print('Error rate: {:.4f}'.format(error_cnt / len(test_images)))
    print()
    
            
            
        


Posterior (in log scale):
0: 0.1110584212873467
1: 0.11929719805351668
2: 0.10525595784545189
3: 0.09983411223879049
4: 0.09361420155207421
5: 0.09722267631166763
6: 0.11526582213409427
7: 0.07376552288489126
8: 0.09924652479526214
9: 0.08543956289690467
Prediction: 7, Ans: 7

Posterior (in log scale):
0: 0.09988996532166798
1: 0.10792179037615664
2: 0.08243998312252636
3: 0.09002300610224136
4: 0.10882780253980255
5: 0.09224200977015605
6: 0.08926034360405265
7: 0.11975803611946034
8: 0.09623471147551221
9: 0.11340235156842386
Prediction: 2, Ans: 2

Posterior (in log scale):
0: 0.12889085354292049
1: 0.06352669088735036
2: 0.09849136260738496
3: 0.10048811529680136
4: 0.1033613335207858
5: 0.09645494085909011
6: 0.1033266618190807
7: 0.10204913879392363
8: 0.10019127545579212
9: 0.10321962721687047
Prediction: 1, Ans: 1

Posterior (in log scale):
0: 0.07721987241226218
1: 0.1434729126442954
2: 0.09661180896171435
3: 0.09609972378963001
4: 0.10124855915672577
5: 0.0922192353903342
6: 0