In [61]:
import numpy
import sys
import csv
from math import sqrt


In [62]:
def sigmoid(x):
    return float(1.0/(1.0 + numpy.exp(-x)))

def differentiationSigmoid(x):
    return sigmoid(x)*(1.0-sigmoid(x))

In [63]:
def net(W, X):        #net = W^t*X
    n = W.shape[1]
    Net = numpy.zeros((n,1), dtype=numpy.float64)
    Net = numpy.add(Net, numpy.dot(W.transpose(), X))
    return Net

def sensitivityK(TkVec, ZkVec, netKVec):    #sensitivity at layer k (O/P layer)
    #TkVec is a vector of 3 elements
    #ZkVec is a vector of 3 elements
    #netKVec is a vector of 3 elements
    sensitivityK_Vec = numpy.subtract(TkVec, ZkVec)
    for i in range(sensitivityK_Vec.shape[0]):
        sensitivityK_Vec[i][0] *= differentiationSigmoid(netKVec[i][0])
    return sensitivityK_Vec
    
def sensitivityJ(WkjVec, sensitivityK_Vec, netJVec):    #sensitivity at layer j (Hidden layer)
    #WkjVec is a 2D matrix of size (nH+1 X 3) - last element is Wo
    #sensitivityK_Vec is a vector of 3 elements
    #netJVec is a vector of nH elements
    sensitivityJ_Vec = numpy.zeros((len(netJVec)+1,1), dtype=numpy.float64)
    sensitivityJ_Vec = numpy.add(sensitivityJ_Vec, numpy.dot(WkjVec, sensitivityK_Vec))
    for j in range(netJVec.shape[0]):
        sensitivityJ_Vec[j][0] *= differentiationSigmoid(netJVec[j][0])
    return sensitivityJ_Vec

In [64]:
def calculateZk(WkjVec, YjVec):
    return calculateYj(WkjVec, YjVec)

def calculateYj(WjiVec, X):
    nH = WjiVec.shape[1]
    YjVec = numpy.zeros((nH,1), dtype=numpy.float64)
    YjVec = numpy.add(YjVec, numpy.dot(WjiVec.transpose(), X))
    for i in range(nH):
        YjVec[i][0] = sigmoid(YjVec[i][0])
    return YjVec

In [65]:
def neuralNet3Layer(WkjVec, WjiVec, errorThreshold, eita, TkMat, X_Mat, Xtest_Mat, targetDigitVec):     
    #implements on-line learning algo(back propagation) for 3 layer neural net
    #WkjVec is a 2D matrix of size (nH+1 X 3)
    #WjiVec is a 2D matrix of size (ni+1 X nH)
    #TkMat is a matrix of target value vectors
    #X_Mat is a matrix of augmented column vectors - 1 augmented in the end of the vector
    error = 9999.0;            #big value so that loop runs at least once
    i = 0;
    n = X_Mat.shape[1]        #no of feature vectors
    count = 0
    while(error > errorThreshold):
        X = X_Mat[:, i]
        X = X.reshape((len(X), 1))    #real column vector
        TkVec = TkMat[:, i]
        TkVec = TkVec.reshape((len(TkVec), 1))
        netJVec = net(WjiVec, X)
        YjVec = calculateYj(WjiVec, X)
        YjVecAugmented = numpy.append(YjVec, numpy.array([1]).reshape((1,1)), axis=0)
        netKVec = net(WkjVec, YjVecAugmented)
        ZkVec = calculateZk(WkjVec, YjVecAugmented)
        delK_Vec = sensitivityK(TkVec, ZkVec, netKVec)
        delJ_Vec = sensitivityJ(WkjVec, delK_Vec, netJVec)
        WkjVec = numpy.add(WkjVec, numpy.dot(YjVecAugmented, delK_Vec.transpose())*eita)
        tmpVec = numpy.dot(X, delJ_Vec.transpose())
        WjiVec = numpy.add(WjiVec, tmpVec[:, :-1]*eita)
        #calculate the error
        YjVec = calculateYj(WjiVec, X)    #now use new updated WjiVec
        YjVecAugmented = numpy.append(YjVec, numpy.array([1]).reshape((1,1)), axis=0)
        ZkVec = calculateZk(WkjVec,    YjVecAugmented)    #now use new updated WkjVec    
        errorVecNorm = numpy.linalg.norm(numpy.subtract(TkVec, ZkVec))
        error = 0.5*pow(errorVecNorm, 2)
        '''
        error = calculateError(WjiVec, WkjVec, TkMat, X_Mat)
        '''
        
        #if count%5000==0:
            #print("Iteration=",count,"\nWji---",WjiVec)
            #print("Iteration=",count,"\nWkj---",WkjVec)
            #print("error ", error)
        i = (i+1)%n
        count += 1
    print("Iteration=",count,"\nFinal Wji---",WjiVec)
    print("Iteration=",count,"\nFinal Wkj---",WkjVec)
    print("\nAccuracy: ", test(WjiVec, WkjVec, targetDigitVec, Xtest_Mat))

In [66]:
def test(WjiVec, WkjVec, targetDigitVec, X_Mat):
    accuracy = 0.0
    for i in range(X_Mat.shape[1]):
        X = X_Mat[:, i]
        X = X.reshape((len(X), 1))    #real column vector
        YjVec = calculateYj(WjiVec, X)
        YjVecAugmented = numpy.append(YjVec, numpy.array([1]).reshape((1,1)), axis=0)
        ZkVec = calculateZk(WkjVec, YjVecAugmented)
        ans = numpy.argmax(ZkVec)
        #print("ans given", ans)
        #print("true answer", targetDigitVec[i])
        if(targetDigitVec[i]==ans):
            accuracy += 1
    return accuracy/X_Mat.shape[1]    

In [67]:
if __name__ == "__main__":
    eita = 0.05
    errorThreshold = 0.001
    #nH = int(input("nH: "))
    NH=[3,5,10,15]
    for nH in NH:
        print("----Number of Hidden Layers-----",nH)
        c = 3 # for class 0,1,2
        ni = 64
        #WkjVec is a 2D matrix of size (nH+1 X 3)
        #WjiVec is a 2D matrix of size (ni+1 X nH)
        #TkVec is a vector of 3 elements
        #X_Mat is a matrix of augmented column vectors - 1 augmented in the end of the vector
        wji = numpy.random.randn(ni+1, nH)
        wkj = numpy.random.randn(nH+1, c)
        for i in range(len(wji)):
            for j in range(len(wji[i])):
                wji[i][j] *= (1/sqrt(ni))
        for i in range(len(wkj)):
            for j in range(len(wkj[i])):
                wkj[i][j] *= (1/sqrt(nH))
        X_Mat = numpy.zeros((ni+1,1))
        TkMat = numpy.zeros((c,1))
        with open('optdigits.tra.csv','r') as f1:
            reader = csv.reader(f1)
            for row in reader:
                #make a column vector of first 64 features
                row = [int(x) for x in row]
                X =  numpy.append(numpy.asarray(row[:-1]), 1).reshape((ni+1, 1))
                X_Mat = numpy.hstack((X_Mat, X))
                targetDigit = row[-1]
                Tk = numpy.zeros((c,1))
                Tk[targetDigit] = 1
                TkMat = numpy.hstack((TkMat, Tk))
        X_Mat = X_Mat[:, 1:]
        TkMat = TkMat[:, 1:]
        #Testing data
        Xtest_Mat = numpy.zeros((ni+1,1))
        targetDigitVec = []
        with open('optdigits.tes.csv','r') as f2:
            reader = csv.reader(f2)
            for row in reader:
                row = [int(x) for x in row]
                X =  numpy.append(numpy.asarray(row[:-1]), 1).reshape((ni+1, 1))
                Xtest_Mat = numpy.hstack((Xtest_Mat, X))
                targetDigitVec.append(row[-1])
            Xtest_Mat = Xtest_Mat[:, 1:]
        neuralNet3Layer(wkj, wji, errorThreshold, eita, TkMat, X_Mat, Xtest_Mat, targetDigitVec)

('----Number of Hidden Layers-----', 3)
('Iteration=', 37106, '\nFinal Wji---', array([[-0.26570635,  0.07391975,  0.05153561],
       [-0.12936421,  0.40003792,  0.05204389],
       [-0.11247284,  0.44291353, -0.60271217],
       [-0.39331659,  0.05023629, -0.07303955],
       [-0.79401293,  0.3698436 , -0.47886831],
       [ 0.35625745,  0.04402892, -0.49939111],
       [-0.05084843, -0.03693933, -0.19477125],
       [ 0.0690066 ,  0.01805038, -0.21783437],
       [-0.03138705, -0.00987999,  0.0726756 ],
       [-0.58168617,  0.72915436, -0.29607846],
       [-0.86248732,  0.73313985, -0.11269327],
       [ 0.0209049 ,  0.15987557,  0.03083071],
       [-0.28675086,  0.08779907, -0.29067971],
       [-0.22544726, -0.07958326,  0.27248275],
       [-0.18142989, -0.20197677, -0.00735273],
       [-0.11586826, -0.02719544,  0.05092496],
       [ 0.11272179, -0.05931319,  0.02593992],
       [ 0.15827046,  0.16561665, -0.16365404],
       [ 0.23278955, -0.35593301,  0.03437384],
       [