In [1]:
import numpy as np
import numba as nb
import math

In [2]:
def Load_data():
    
    # Training Data
    label_file = open("train-labels.idx1-ubyte_", "rb")
    image_file = open("train-images.idx3-ubyte_", "rb")
    
    label_file.read(8)
    image_file.read(4)
    number = int.from_bytes(image_file.read(4), byteorder='big')
    row = int.from_bytes(image_file.read(4), byteorder='big')
    col = int.from_bytes(image_file.read(4), byteorder='big')
    
    # Initialze data structure
    training_label = np.zeros(number, dtype=int)
    training_data = np.zeros((number, row, col), dtype=int)
    
    # Load training data
    for i in range(number):
        training_label[i] = label_file.read(1)[0]
        for j in range(row):
            for k in range(col):
                training_data[i][j][k] = image_file.read(1)[0]
                
    label_file.close()
    image_file.close()
    
    
    # Testing Data
    label_file = open('t10k-labels.idx1-ubyte_', 'rb')
    image_file = open('t10k-images.idx3-ubyte_', 'rb')
    
    label_file.read(8)
    image_file.read(4)
    number = int.from_bytes(image_file.read(4), byteorder='big')
    row = int.from_bytes(image_file.read(4), byteorder='big')
    col = int.from_bytes(image_file.read(4), byteorder='big')
    
    testing_label = np.zeros(number, dtype=int)
    testing_data = np.zeros((number, row, col), dtype=int)
    
    for i in range(number):
        testing_label[i] = label_file.read(1)[0]
        for j in range(row):
            for k in range(col):
                testing_data[i][j][k] = image_file.read(1)[0]
                
    label_file.close()
    image_file.close()
    
    return training_label, training_data, testing_label, testing_data



In [10]:
training_label, training_data, testing_label, testing_data = Load_data()

print('Shape of training_label:', training_label.shape)
print('Shape of training_data:', training_data.shape)
print('Shape of testing_label:', testing_label.shape) 
print('Shape of testing_data:', testing_data.shape)

Shape of training_label: (60000,)
Shape of training_data: (60000, 28, 28)
Shape of testing_label: (10000,)
Shape of testing_data: (10000, 28, 28)


$$P(X|C) = \frac{P(X, C)}{P(C)}$$

## Discrete

In [24]:
def Discrete(label, data):
    ''' 
    For each digit (0-9), 
    for every pixel in the image (28 rows and 28 columns), 
    calculate the conditional probability of that pixel's grayscale value falling into one of the 32 bins.
    '''
    prior = np.ones(10)
    likelihood = np.ones((10, 28, 28, 32))
    
    # calculate P(X,C)
    for i in range(len(label)):
        prior[label[i]] += 1
        for j in range(28):
            for k in range(28):
                pixel = data[i][j][k]
                likelihood[label[i]][j][k][int(pixel/8)] += 1
                
    # calculate P(X|C)
    for i in range(10):
        for j in range(28):
            for k in range(28):
                for l in range(32):
                    likelihood[i][j][k][l] /= prior[i]
                    
    return prior/len(label), likelihood

## Continuous

$$\hat{\mu} = \frac{1}{N}\sum_{i=1}^{N}x_i$$

$$\hat{\sigma^2} = \frac{1}{N}\sum_{i=1}^{N}x_i^2 - (\frac{1}{N}\sum_{i=1}^{N}x_i)^2$$

In [25]:
def Continuous(labels, data):
    ''' 
    For each digit (0-9), 
    for every pixel in the image (28 rows and 28 columns), 
    use MLE to fit a Gaussian distribution for the value of each pixel.
    '''
    prior = np.ones(10)
    likelihood = np.ones((10, 28, 28, 2)) # mean & mean of square
    
    # calculate P(X,C)
    for i in range(len(labels)):
        label = labels[i]
        for j in range(28):
            for k in range(28):
                pixel = data[i][j][k]
                likelihood[label][j][k][0] = (prior[i]/prior[i]+1 * likelihood[label][j][k][0]) + (1/prior[i]+1 * pixel)
                likelihood[label][j][k][1] = (prior[i]/prior[i]+1 * likelihood[label][j][k][1]) + (1/prior[i]+1 * pixel**2)
        prior[label] += 1
    
    # Var(X) = E(X^2) - [E(X)]^2          
    for i in range(10):
        for j in range(28):
            for k in range(28):
                    likelihood[i][j][k][1] -=  likelihood[i][j][k][0] ** 2
                    
    return prior/len(label), likelihood

# Test

## Discrete:
$$\log P(C|X) = \log P(C) + \sum_{i=1}^{28} \sum_{j=1}^{28} \log P(x_{ij}|C)$$

## Continuous:
$$\log P(C|X) = \log P(C) + \sum_{i, j}( -\frac{1}{2} \log 2 \pi \sigma^2 - \frac{(x-\mu)^2}{2 \sigma^2})$$

In [30]:
def Test(mode, labels, data):
    err = 0.
    
    posterior = np.zeros((len(labels), 10))
    predictions = np.zeros(len(labels))
    answers = np.zeros(len(labels))
    
    for label_idx in range(len(labels)):
        for label in range(10):
            posterior[label_idx] += np.log(prior[label])
            for i in range(28):
                for j in range(28):
                    if mode == 0:
                        posterior[label_idx][label] += np.log(likelihood[label][i][j][int(data[label_idx][i][j]/8)])
                    elif mode == 1:
                        mean = likelihood[label][i][j][0]
                        var = likelihood[label][i][j][1]
                        if var != 0:
                            posterior[label_idx][i][j] += -1/2 * np.log(2*np.pi*var) - (data[label_idx][i][j] - mean)**2 / (2*var)
        
        predictions[label_idx] = np.argmax(posterior[label_idx])
        answers[label_idx] = labels[label_idx]
        if predictions[label_idx] != answers[label_idx]:
            err += 1
            
    return posterior, predictions, answers, err/len(labels)

In [32]:
def printResult(likelihood, posteriors, predictions, answers, error):
    result = ''
    
    for img_idx in range(len(predictions)):
        result += 'Postirior (in log scale):\n'
        for label in range(10):
            result += f'{label}: {posteriors[img_idx][label] / np.sum(posteriors[img_idx])}\n'
        result += f'Prediction: {predictions[img_idx]}, Ans: {answers[img_idx]} \n'
    
    result += 'Imagination of numbers in Bayesian classifier: \n'
    for label in range(10):
        result += f'\n{label}\n: ' 
        for i in range(28):
            for j in range(28):
                if mode == 0:
                    classifier_val = np.argmax(likelihood[label][i][j])
                    result += f'{int(classifier_val/16)}'
                elif mode == 1:
                    classifier_val = np.argmax(likelihood[label][i][j][0])
                    result += f'{int(classifier_val/128)}'
            result += "\n\n"
        
    result += f'Error rate: {error}'
    
    return result
    

In [33]:
if __name__ == "__main__":
    mode = int(input("Toggle option is (discrete or continuous): "))

    # loading
    print("Loading...")
    trainingLabel, trainingData, testLabel, testData = Load_data()

    # training
    print("Training...")
    if mode == 0:
        prior, likelihood = Discrete(trainingLabel, trainingData)
    elif mode == 1:
        prior, likelihood = Continuous(trainingLabel, trainingData)

    # testing
    print("Testing...")
    posterior, predections, answers, err = Test(mode, testLabel, testData)

    resultFile = open("result.txt", 'w')
    resultFile.write(printResult(likelihood, posterior, predections, answers, err))

Loading...
Training...
Testing...
