In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import struct
from array import array

## Naive Bayes Classifier

In [390]:
def unpack_file(train , label , size , col):
    train_file = open(train , "rb")
    label_file = open(label , "rb")

    train_file.read(16)
    label_file.read(8)
    images = np.zeros((size , col) )
    labels = np.zeros((size) )
    for i in range(size):
        for j in range(col):
            images[i,j] = int.from_bytes(train_file.read(1),byteorder='big')
        labels[i] = int.from_bytes(label_file.read(1),byteorder='big')

    return images , labels

In [391]:
x_train , y_train = unpack_file("/Users/cindychen/Documents/ML_hw02/train-images.idx3-ubyte",\
    "/Users/cindychen/Documents/ML_hw02/train-labels.idx1-ubyte" , 60000 , 28*28)
x_test , y_test = unpack_file("/Users/cindychen/Documents/ML_hw02/t10k-images.idx3-ubyte", \
    "/Users/cindychen/Documents/ML_hw02/t10k-labels.idx1-ubyte" , 10000 , 28*28)

In [584]:
re=np.zeros((10,28*28,32))
for i in range(len(x_train)):
    c = int(y_train[i])
    for d in range(28*28):
        slice = int(x_train[i][d] // 8)
        re[c][d][slice]+=1

for c in range(10):
    for d in range(28*28):
        count=0
        for p in range(32):
            count+=re[c][d][p]
        re[c][d][:]/=count

In [626]:
def show(log_scale , pred , ans):
    print('Postirior (in log scale):',)
    print('0:{}\n1: {}\n2: {}\n3: {}\n4: {}\n5: {}\n6: {}\n7: {}\n8: {}\n9: {}'.format(\
        log_scale[0],log_scale[1],log_scale[2],log_scale[3],log_scale[4],log_scale[5],log_scale[6],log_scale[7],log_scale[8],log_scale[9]))
    print('Prediction: {} , Ans: {}\n'.format(pred , int(ans)))

def plot_image(mat , divide):
    for k in range(10):
        print('{}:'.format(k))
        for i in range(28):
            for j in range(28):
                print('1' if mat[k][i*28+j] > divide else '0' , end = ' ')
            print()
        print()

In [629]:
class Naive_Bayes:
    def __init__(self , option , train_data , train_label, test_data , test_label):
        self.option = option
        self.train_data = train_data
        self.train_label = train_label
        self.test_data = test_data
        self.test_label = test_label
        self.class_num = [i for i in range(10)]
        self.mu = []
        self.std = []
        self.prior = self.count_prior()
        self.count = []

    def count_prior(self):
        prior = []
        for val in range(10):
            idx = [i for i in range(len(self.train_label)) if self.train_label[i] == val]
            n = len(idx)
            prior.append(len(idx)/len(self.train_label))
        return prior
        
    def count_mean(self , idx):
        return  np.mean(self.train_data[idx] , axis = 0)

    def count_std(self, idx):
        var = np.var(self.train_data[idx] , axis = 0)
        for i in range(len(var)):
            if var[i] == 0:
                var[i] = 1000
        return var

    def train_continue(self):
        for val in range(10):
            idx = [i for i in range(len(self.train_label)) if self.train_label[i] == val]
            self.mu.append(self.count_mean(idx))
            self.std.append(self.count_std(idx))
        
        likelihood = []
        error = 0
        for n in range(len(self.test_label)):
            sample = self.test_data[n]
            log_scale = []
            for val in range(10):
                v_mean = self.mu[val]
                v_std = self.std[val] 
                probs = 0
                for j in range(28*28):
                    p = (1 / np.sqrt(2 * np.pi * v_std[j])) * np.exp(-np.square(sample[j] - v_mean[j])/(2 * v_std[j]))
                    probs += np.log(max(1e-30 , p)) * (-1)

                # probs += np.log(self.prior[val])
                log_scale.append(probs)

            log_scale /= np.sum(log_scale)
            likelihood.append(log_scale)
            pred = np.argmin(log_scale)
            if pred != self.test_label[n]:
                error += 1
            show(log_scale , pred , self.test_label[n])

        print(error/len(self.test_label))
        plot_image(self.mu , 128)


    def train_discrete(self):
        count_map = np.zeros((10,28*28,32))
        for i in range(len(self.train_label)):
            num = int(self.train_label[i])
            for d in range(28*28):
                slice = int(self.train_data[i,d])//8
                re[num][d][slice] += 1
        for c in range(10):
            for d in range(28*28):
                count=0
                for p in range(32):
                    count+=re[c][d][p]
                re[c][d][:] /= count

        likelihood = []
        error = 0
        for i in range(len(self.test_label)):
            sample = self.test_data[i]
            log_scale = []
            for val in range(10):
                probs = 0
                for j in range(28*28):
                    p = np.log(max(1e-6 , re[val][j][int(sample[j]//8)]))
                    probs += p
                log_scale.append(probs)
            
            log_scale /= np.sum(log_scale)
            likelihood.append(log_scale)
            pred = np.argmin(log_scale)
            if pred != self.test_label[i]:
                error += 1
            show(log_scale , pred , self.test_label[i])
        
        mat = np.zeros((10,28*28))
        for i in range(10):
            for j in range(28*28):
                mat[i][j] = np.argmax(re[i][j])
            
        plot_image(mat , 16)
        print(error/len(self.test_label))
            
    
    def train(self):
        if self.option == 0:
            self.train_continue()
        else:
            self.train_discrete()




In [None]:
c_model = Naive_Bayes(0 , x_train , y_train, x_test , y_test)
c_model.train()

In [None]:
d_model = Naive_Bayes(1 , x_train , y_train, x_test , y_test)
d_model.train()

## Oneline Learning

In [17]:
class online_learning:
    def __init__(self , a , b , data):
        self.pri_a = a
        self.pri_b = b
        self.data = data
        self.post_a = self.pri_a
        self.post_b = self.pri_b
        self.likelihood = 0
    
    ## The theta MLE of Binomial distribtion is a/N, where a is the number of head result.
    def compute_likelihood(self, a, b):
        times = a+b
        p = a / times
        L = 1
        for i in range(a):
            L *= p
        for j in range(b):
            L *= (1-p)
        return L*self.factorial(times, a)

    ## Computing Factorial
    def factorial(self, n , k):
        out = 1
        for mul in range(n,k,-1):
            out *= mul
        for div in range(1,n-k+1):
            out /= div
        return out           

    ## Computing Posterior 
    def compute_posterior(self , n):
        a = 0
        b = 0
        for identity in self.data[n]:
            if identity == '1':
                a += 1
            else:
                b += 1           
        return a , b

    def train(self):
        count = len(self.data)
        for i in range(count):
            a , b = self.compute_posterior(i)
            self.likelihood = self.compute_likelihood(a,b)
            self.post_a += a
            self.post_b += b
            self.show(i)
            self.pri_a , self.pri_b = self.post_a , self.post_b
            

    def show(self , n):
        print('case {}: {}\nLikelihood:{}\nBeta prior:     a = {} b = {} \
        \nBeta posterior: a = {} b = {}\n'.format(n+1, self.data[n],\
        self.likelihood , self.pri_a , self.pri_b , self.post_a , self.post_b))



In [12]:
def open_file(dir):
    f = open( dir , "r")
    text = []
    for line in f.readlines():
        line = line.split('\n')[0]
        text.append(line)
    return text

In [18]:
if __name__ == '__main__':
    data = open_file('/Users/cindychen/Documents/ML_hw02/testfile.txt')

    a = 10
    b = 1
    model = online_learning(a , b , data)
    model.train()

case 1: 0101010101001011010101
Likelihood:0.16818809509277344
Beta prior:     a = 10 b = 1         
Beta posterior: a = 21 b = 12

case 2: 0110101
Likelihood:0.29375515303997485
Beta prior:     a = 21 b = 12         
Beta posterior: a = 25 b = 15

case 3: 010110101101
Likelihood:0.22860542417943355
Beta prior:     a = 25 b = 15         
Beta posterior: a = 32 b = 20

case 4: 0101101011101011010
Likelihood:0.18286870706509092
Beta prior:     a = 32 b = 20         
Beta posterior: a = 43 b = 28

case 5: 111101100011110
Likelihood:0.21430705488578333
Beta prior:     a = 43 b = 28         
Beta posterior: a = 53 b = 33

case 6: 101110111000110
Likelihood:0.20659760529408003
Beta prior:     a = 53 b = 33         
Beta posterior: a = 62 b = 39

case 7: 1010010111
Likelihood:0.25082265600000003
Beta prior:     a = 62 b = 39         
Beta posterior: a = 68 b = 43

case 8: 11101110110
Likelihood:0.2619678932864457
Beta prior:     a = 68 b = 43         
Beta posterior: a = 76 b = 46

case 9: 010