In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from numpy import linalg as LA

In [2]:

def list_files(directory):

    if os.path.exists(directory) == False:
        return None

    return [x for x in os.listdir(directory) if os.path.isfile(os.path.join(directory, x))]


In [3]:
def LoadImageData(dPath, fileNames):

    if os.path.exists(dPath) == False or fileNames.__class__ != [].__class__:
        return None

    Images = list()
    Labels = list()
    for f in  fileNames:
        filePath = dPath + '/' + f
        Images.append(cv2.imread(filePath, 0))
        if f[-5].isdigit():
            Labels.append(np.float64(f[-5]))
        else:
            raise ValueError('The file name does not end with digit.')

    return Images , Labels

In [4]:
def ReconstructData(Images , Labels):

    if Labels.__class__ != [].__class__ or Images.__class__ != [].__class__:
        return None

    m,n = Images[0].shape
    k = len(Images)
    DataMat = np.zeros((k, m*n))
    LabelVec = np.array(Labels).reshape((-1,1))

    for i in range(k):

        DataMat[i] = Images[i].reshape((1,-1))

    return DataMat, LabelVec

In [5]:
def LoadMnistData():

    dataPath = os.getcwd() + '/Data'
    testPath = dataPath + '/test'
    trainPath = dataPath + '/train'

    TestFileNames = list_files(testPath)
    TrainFileNames = list_files(trainPath)

    TestList, TestLabelList = LoadImageData(testPath, TestFileNames)
    TrainList, TrainLabelList = LoadImageData(trainPath, TrainFileNames)

    TestDataMat, TestLabelVec = ReconstructData(TestList, TestLabelList)
    TrainDataMat, TrainLabelVec = ReconstructData(TrainList, TrainLabelList)

    return TrainDataMat, TrainLabelVec, TestDataMat, TestLabelVec

In [6]:
def BinarizeData(TrainDataMat, TrainLabelVec, TestDataMat, TestLabelVec, num1, num2):

    indListTr = list()
    for k in range(len(TrainLabelVec)):

        if TrainLabelVec[k] == num1:

            indListTr.append(k)
            TrainLabelVec[k] = 0.

        elif TrainLabelVec[k] == num2:

            indListTr.append(k)
            TrainLabelVec[k] = 1.

    indListTe = list()
    for k in range(len(TestLabelVec)):

        if TestLabelVec[k] == num1:

            indListTe.append(k)
            TestLabelVec[k] = 0.

        elif TestLabelVec[k] == num2:

            indListTe.append(k)
            TestLabelVec[k] = 1.

    return TrainDataMat[indListTr, :], TrainLabelVec[indListTr, :], TestDataMat[indListTe, :], TestLabelVec[indListTe, :]

In [7]:
def SigmoidF(w,x):

    return 1 / (1 + np.exp(-x.dot(w)))

In [8]:
def myLogisticRegression(TrainData, TrainLabel, eps=0.01, sensitivity=10 ** (-7)):

    w0 = np.random.uniform(0, np.max(TrainData), (TrainData.shape[1],1))

    dl0 = TrainData.transpose().dot(TrainLabel - SigmoidF(w0,TrainData))

    w1 = w0 + eps * dl0
    dl1 = TrainData.transpose().dot(TrainLabel - SigmoidF(w1,TrainData))

    df = dl1 - dl0
    NormVal = LA.norm(df, 2) ** 2

    while NormVal > sensitivity:

        w0 = w1
        dl0 = dl1

        w1 = w0 + eps * dl0
        dl1 = TrainData.transpose().dot(TrainLabel - SigmoidF(w1, TrainData))

        df = dl1 - dl0
        NormVal = LA.norm(df, 2) ** 2

    return w1

In [9]:
def myLogisticClassification(TestData, w):

    appLabel = SigmoidF(w,TestData)
    appLabel[appLabel >= 0.5] = 1
    appLabel[appLabel < 0.5] = 0
    
    LabelVec = 1 - appLabel.reshape((-1,1))
    
    return LabelVec

In [10]:
TrainDataMat, TrainLabelVec, TestDataMat, TestLabelVec = LoadMnistData()


In [11]:
num1, num2 = 0., 1.
bTrainDataMat, bTrainLabelVec, bTestDataMat, bTestLabelVec = BinarizeData(TrainDataMat, TrainLabelVec, TestDataMat, TestLabelVec, num1, num2)


In [13]:
W = myLogisticRegression(bTrainDataMat, bTrainLabelVec)

print(W.shape)

(784, 1)


In [14]:
ApproximateLabel = myLogisticClassification(bTestDataMat, W)

In [15]:
acc = 100 * np.sum(np.abs(ApproximateLabel - bTestLabelVec)) / len(bTestLabelVec)

print(acc)

99.81087470449172
