**20164245 Hong Jin**

Build a binary classifier for each digit against all the other digits at MNIST dataset.

Let $x = (x_1, x_2, ... , x_m)$ be a vector representing an image in the dataset.

The prediction function $f_d(x; w)$ is defined by the linear combination of data (1, x) and the model parameter w for each digit d :
$f_d(x; w) = w_0 * 1 + w_1 * x_1 + w_2 * x_2 + ... + w_m * x_m$ 
where $w = (w_0, w_1, ... , w_m)$

The prediction function f_d(x; w) should have the following values:

$f_d(x; w) = +1$ if label(x) = d

$f_d(x; w) = -1$ if label(x) is not d

The optimal model parameter w is obtained by minimizing the following objective function for each digit d :
$\sum_i ( f_d(x^{(i)}); w) - y^{(i)} )^2$

and the label of input x is given by:

argmax_d $f_d(x; w)$

1. Compute an optimal model parameter using the training dataset for each classifier $f_d(x, w)$
2. Compute (1) true positive rate, (2) error rate using (1) training dataset and (2) testing dataset.

# Set up

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from pandas import Series, DataFrame
import pandas as pd
from numpy import linalg as lin

file_data = "mnist_train.csv"
handle_file = open(file_data, "r")
data = handle_file.readlines()
handle_file.close()

test_file_data = "mnist_test.csv"
test_handle_file = open(test_file_data, "r")
test_data = test_handle_file.readlines()
test_handle_file.close()

size_row = 28
size_col = 28
dim = size_col * size_row

num_image = len(data)
test_num_image = len(test_data)

# Normalization

In [2]:
def normalize(data):
    data_normalized = (data-min(data)) / (max(data) - min(data))
    return (data_normalized)

# Functions

In [3]:
def distance(x,y):
    d = x - y
    s = d ** 2
    return s
    
def check(M,val):
    length = len(M)
    res = np.zeros((length))
    for i in range(length):
        if(M[i] == val):
            res[i] = 1
        else:
            res[i] = -1
    return res

def sign(x):
    if(x>=0):
        return 1
    else:
        return -1

# Make label, image array with train, test data

In [4]:
list_image = np.empty((size_row * size_col, num_image), dtype=float)
list_label = np.empty(num_image, dtype=int)
test_list_image = np.empty((size_row * size_col, test_num_image), dtype=float)
test_list_label = np.empty(test_num_image, dtype=int)
count = 0
test_count = 0

for line in data:
    line_data = line.split(',')
    label = line_data[0]
    im_vector = np.asfarray(line_data[1:])
    im_vector = normalize(im_vector)
    
    list_label[count] = label
    list_image[:,count] = im_vector
    
    count += 1
    
for test_line in test_data:
    test_line_data = test_line.split(',')
    test_label = test_line_data[0]
    test_im_vector = np.asfarray(test_line_data[1:])
    test_im_vector = normalize(test_im_vector)
    
    test_list_label[test_count] = test_label
    test_list_image[:,test_count] = test_im_vector
    
    test_count += 1


# Multi-Labeled Images

In [5]:
B = []
num_class=10
num_p=785
matrix_A = np.zeros((count, 785), dtype = float)

for i in range(num_class):
    B.append(np.matrix(np.transpose(check(list_label, i))))
print (B)

[matrix([[-1.,  1., -1., ..., -1., -1., -1.]]), matrix([[-1., -1., -1., ..., -1., -1., -1.]]), matrix([[-1., -1., -1., ..., -1., -1., -1.]]), matrix([[-1., -1., -1., ..., -1., -1., -1.]]), matrix([[-1., -1.,  1., ..., -1., -1., -1.]]), matrix([[ 1., -1., -1., ...,  1., -1., -1.]]), matrix([[-1., -1., -1., ..., -1.,  1., -1.]]), matrix([[-1., -1., -1., ..., -1., -1., -1.]]), matrix([[-1., -1., -1., ..., -1., -1.,  1.]]), matrix([[-1., -1., -1., ..., -1., -1., -1.]])]


# Define Matrix A

$f_i(x) = x^{i-1}, i = 1, \dots, p$

$\hat{f}(x) = \theta_1 + \theta_2 x + \cdots  + \theta_p x^{p-1}$

$A = \begin{bmatrix} 1 & x^{(1)} & \cdots & (x^{(1)})^{p-1} \\ 1 & x^{(2)} & \cdots & (x^{(2)})^{p-1} \\ \vdots & \vdots & & \vdots \\ 1 & x^{(N)} & \cdots & (x^{(N)})^{p-1}\end{bmatrix}$

($x^i$ means scalar $x$ to $i$th power; $x^{(i)}$ is $i$th data point)

$\theta = (A^T A)^{-1} A^T b$


In [6]:
def defMatrix():
    for i in range(0,60000):
        matrix_A[i,0] = 1
        matrix_A[i,1:] = list_image[:,i]
    return matrix_A

# Compute an optimal model parameter using the training dataset

In [48]:
scores = np.zeros(785)

theta = np.zeros((num_class, 785))

train_set = list_image[:54000]
test_set = list_image[54000:]

for j in range(784):
    p = j
    feature = list_image
    
    A = defMatrix()
    
    for i in range(10):
        theta[i] = lin.pinv(A)*A.T#.*B[i].T

#     for k in range(num_class):
#         temp_theta = (A.T * A).I*A.T*B[k].T
#         count_num = 0
    
#         for i in range(dim):
#             if i not in index:
#                 theta[j][k][i]=temp_theta[count_num]
#                 count_num +=1
            
    nums = np.zeros((num_class+1, num_class+1))
    dist = 0
    min_num = 100000000
    
    for i in range(count):
        prediction = np.argmax(theta[j].dot(list_image))
        answer = list_label[i]
        
        nums[answer][prediction] += 1
        dist += distance(prediction, answer)
        
    score[j] = dist
m = 2**np.argmin(scores)
print("best = " + str(m))

MemoryError: 

# Compute Accuracy

In [None]:
def computeAcc(image, counts, label):
    acc_num = np.zeros((10))
    nacc_num = np.zeros((10))
    total = np.zeros((10))
    tp = np.zeros((10))
    error = np.zeros((10))
    feature = defMatrix(image, m)
    
    for i in range(counts):
        pred = np.argmax(theta[m].dot(feature[:,i]))
        answ = label[i]
        total[answ] += 1
        if(pred == answ):
            acc_num[answ] += 1
        else:
            nacc_num[answ] += 1
    
    for i in range(10):
        tp[i] = acc_num[i] / total[i]
        error[i] = nacc_num[i] / total[i]
    
    return tp, error

# Compute (1) True Positive, (2) False Positive, (3) True Negative, (4) False Negative

## Training set

In [None]:
tp, error = computeAcc(list_image, count, list_label)

In [None]:
data = {'True Positive' : [tp],
            'Error' : [error]}
frame = DataFrame(data, columns = ['Ture Positive', 'Error'])
frame    

## Test set

In [None]:
tp, error = computeAcc(test_list_image, test_count, test_list_label)

In [None]:
data = {'True Positive' : [tp],
            'Error' : [error]}
frame = DataFrame(data, columns = ['Ture Positive', 'Error'])
frame   