In [1]:
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.decomposition import PCA

In [2]:
def get_data():
    data = np.load('mnist.npz')
    train_x = data['x_train']
    train_y = data['y_train']
    mlb = MultiLabelBinarizer()
    temp = mlb.fit_transform(np.expand_dims(train_y,-1))
    train_y_ = np.zeros((temp.shape[0], temp.shape[1]+2), dtype='uint8')
    train_y_[:,0:-2] = temp
    y_6 = (train_y>=7)
    y_odd = (train_y % 2 == 1)
    train_y_[y_6, 10] = 1
    train_y_[y_odd, 11] = 1
    x = train_x.reshape(train_x.shape[0], -1)[0:1000].astype(np.float32)/255.0
    y = train_y_[0:1000].astype(np.int32)
    
    test_x = train_x.reshape(train_x.shape[0], -1)[1000:1010].astype(np.float32)/255.0
    test_y = train_y_[1000:1010].astype(np.int32)
    
    pca = PCA(n_components=100).fit(x)
    x = pca.transform(x)
    test_x = pca.transform(test_x) 
    print(train_y[1000:1010])
    print(test_y)
    return x, y, test_x, test_y

In [3]:
def knn(train_x, t_index, k):
    data_num = train_x.shape[0]
    dis = np.zeros(data_num)
    neighbors = np.zeros(k)

    for i in range(data_num):
        dis[i] = ((train_x[i] - train_x[t_index]) ** 2).sum()

    for i in range(k):

        temp = float('inf')
        temp_j = 0
        for j in range(data_num):
            if (j != t_index) and (dis[j] < temp):
                temp = dis[j]
                temp_j = j
        dis[temp_j] = float('inf')
        neighbors[i] = temp_j     
    return neighbors

In [4]:
def knn_test(train_x, t, k):
    data_num = train_x.shape[0]
    dis = np.zeros(data_num)
    neighbors = np.zeros(k)

    for i in range(data_num):
        dis[i] = ((train_x[i] - t) ** 2).sum()

    for i in range(k):

        temp = float('inf')
        temp_j = 0
        for j in range(data_num):
            if dis[j] < temp:
                temp = dis[j]
                temp_j = j
        dis[temp_j] = float('inf')
        neighbors[i] = temp_j
    return neighbors

In [5]:
class MLKNN(object):
    def __init__(self, train_x, train_y, k ,s):
        self.k = k
        self.s = s
        self.train_x = train_x
        self.train_y = train_y
        self.label_num = train_y.shape[1]
        self.train_data_num = train_x.shape[0]
        self.Ph1 = np.zeros(self.label_num)
        self.Ph0 = np.zeros(self.label_num)
        self.Peh1 = np.zeros([self.label_num, self.k + 1])
        self.Peh0 = np.zeros([self.label_num, self.k + 1])

    def train(self):
        #computing the prior probabilities
        for i in range(self.label_num):
            cnt = 0
            for j in range(self.train_data_num):
                if self.train_y[j][i] == 1:
                    cnt = cnt + 1
            self.Ph1[i] = (self.s + cnt) / (self.s * 2 + self.train_data_num)
            self.Ph0[i] = 1 - self.Ph1[i]
            
        for i in range(self.label_num):

            print('training for label\n', i + 1)
            c1 = np.zeros(self.k + 1)
            c0 = np.zeros(self.k + 1)

            for j in range(self.train_data_num):
                temp = 0
                neighbors = knn(self.train_x, j, self.k)

                for k in range(self.k):
                    temp = temp + int(self.train_y[int(neighbors[k])][i])

                if self.train_y[j][i] == 1:
                    c1[temp] = c1[temp] + 1
                else:
                    c0[temp] = c0[temp] + 1

            for j in range(self.k + 1):
                self.Peh1[i][j] = (self.s + c1[j]) / (self.s * (self.k + 1) + np.sum(c1))
                self.Peh0[i][j] = (self.s + c0[j]) / (self.s * (self.k + 1) + np.sum(c0))              
        
    def test(self, test_x, test_y):
        predict = np.zeros(test_y.shape, dtype=int)
        test_data_num = test_x.shape[0]

        for i in range(test_data_num):
            neighbors = knn_test(self.train_x, test_x[i], self.k)
            for j in range(self.label_num):
                temp = 0
                for nei in neighbors:
                    temp = temp + int(self.train_y[int(nei)][j])
                if(self.Ph1[j] * self.Peh1[j][temp] > self.Ph0[j] * self.Peh0[j][temp]):
                    predict[i][j] = 1
                else:
                    predict[i][j] = 0
        print(predict)

In [6]:
if __name__ == '__main__':
    k = 7
    smooth = 1
    x, y, test_x, test_y = get_data()
    mlknn = MLKNN(x, y, k, smooth)
    mlknn.train()
    mlknn.test(test_x, test_y)

[0 7 1 1 4 9 4 3 4 8]
[[1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 1 1]
 [0 1 0 0 0 0 0 0 0 0 0 1]
 [0 1 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1 1 1]
 [0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 1]
 [0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 1 0]]
training for label
 1
training for label
 2
training for label
 3
training for label
 4
training for label
 5
training for label
 6
training for label
 7
training for label
 8
training for label
 9
training for label
 10
training for label
 11
training for label
 12
[[1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 1 1]
 [0 1 0 0 0 0 0 0 0 0 0 1]
 [0 1 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 1 1 1]
 [0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0 0 1]
 [0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 1 0]]
