In [1]:
import os
import numpy as np, pandas as pd
from utilities import discretizeMean, oneHot, normalizeDF, makeClassMat
from crossValidate import getXVFolds

In [2]:
def concateBias(x):
    ''' Concatenate bias=1 term to input by adding a column of ones to the
    right of the inut data. This is useful for training ANN bias.
    '''
    return np.hstack( [x, np.ones((x.shape[0],1), x.dtype)] )

def sigmoid(x):
    ''' Sigmoid function
    '''
    return 1.0 / ( 1 + np.exp(-x) )

def softMax(x):
    ''' Calculate normalized exponential function for weights and input matrix.
    Returns the normalized exponential probabilities for each classes.
    '''
    pr = np.exp(x) # matrix multiply of x and weight
    if pr.ndim > 1:
        return pr / pr.sum(axis=1)[:,None]
    else:
        return pr / pr.sum()

def crossEntNK(yhat, y):
    ''' Cross Entropy function. Used as the error for multi-class ANN.
    Calculate error across classes for all data points, and return mean err.
    '''
    return np.sum(-y*np.log(yhat), axis=1).mean() # avg error over all points

def getRandomSeq(N):
    seq = np.arange(N)
    np.random.shuffle(seq)
    return seq

In [3]:
irisFile = os.path.join('./data/', 'iris.data')
irisName = ['sepalLen', 'sepalWth', 'petalLen', 'petalWth', 'class']
raw = pd.read_csv(irisFile , names=irisName)  # read CSV file
irisTypes = makeClassMat(raw['class'])
irisMat = normalizeDF(raw[irisName[:-1]])

In [34]:
folds = getXVFolds(irisMat, irisTypes, categorical=True)
testIdx = folds[0]
trainIdx = np.hstack(folds[1:])
trainData,trainLabel = irisMat[trainIdx],irisTypes[trainIdx]
testData,testLabel = irisMat[testIdx],irisTypes[testIdx]

In [61]:
################################################################################
def train_0hidd(xMat, yMat, eta, eps=1e-7, trace=False, shuffle=True):
    def feedForward(xs, ys, wts):
        return softMax(xs @ wts)
    
    def backProp(ys, yfit, xs, wts):
        return wts + eta * np.outer(xs, ys-yfit)
    
    xMat = concateBias(xMat) # add bias terms
    (nData,nK),nDim = yMat.shape, xMat.shape[1] # size of data and classes
    
    wt = np.random.rand(nDim,nK)/50 - 0.01 # init wts to be (-0.01,0.01)
    lastErr = np.inf # max error possible
    yHats = feedForward(xMat, yMat, wt) # first feedforward calc
    meanErr = crossEntNK(yHats, yMat) # error from random weights
    
    epch = 0
    while (abs(meanErr-lastErr) > eps) and epch < 1e6: # while not converged
        if epch%1000==0 and trace:
            print('Iter #%u, error: %f'%(epch,meanErr))
        
        if shuffle: # shuffle sequence of gradient descent
            seq = getRandomSeq(nData) # random seq for stoch. gradient descent
        else:
            seq = np.arange(nData)
        for n in seq: # loop over data set
            x,y = xMat[n],yMat[n] # index x and y for curr data point
            yHat = feedForward(x, y, wt) # feedforward
            wt = backProp(y, yHat, x, wt) # update weight
        
        lastErr = meanErr
        yHats = feedForward(xMat, yMat, wt) # fitted Y for this epoch
        meanErr = crossEntNK(yHats, yMat) # err for this epoch
        
        if meanErr > lastErr:  # slow learning rate if error increase
            eta /= 5
        epch += 1

    if trace: # print final error
        print('Final iteration #%u, error: %f' % (epch-1,meanErr) )
    return wt,epch

def pred_0hidd(xMat, wts):
    yHat = softMax(concateBias(testData) @ wt)
    return yHat.argmax(axis=1)

In [25]:
v = np.random.rand(6,4)/10
print(np.sum(testData[0] * v, axis=1))
print(v @ testData[0])

[0.05876141 0.04282027 0.06751971 0.0436813  0.04986794 0.03387666]
[0.05876141 0.04282027 0.06751971 0.0436813  0.04986794 0.03387666]


In [71]:
################################################################################
def train_1hidd(xMat, yMat, eta, nNodes, eps=1e-7, trace=False, shuffle=True):
    def feedForward(xs, ys, wtsOut, wtsHidd):
        zs = sigmoid(xs @ wtsHidd)
        return zs, softMax(zs @ wtsOut)
    
    def backProp(ys, yfit, xs, zs, wtsOut, wtsHidd):
        d_Out = eta * np.outer(zs, ys-yfit)
        d_hidd = eta * np.outer(xs, (wtsOut@(ys-yfit)) * zs*(1-zs))
        return wtsOut + d_Out, wtsHidd + d_hidd
    
    xMat = concateBias(xMat)
    (nData,nK),nDim = yMat.shape, xMat.shape[1]
    
    wtOut = np.random.rand(nNodes,nK)/50 - 0.01 # init wts to be (-0.01,0.01)
    wtHidd = np.random.rand(nDim,nNodes)/50 - 0.01
    
    lastErr = np.inf # max error possible
    zs,yHats = feedForward(xMat, yMat, wtOut, wtHidd)
    meanErr = crossEntNK(yHats, yMat)
    
    epch = 0
    while (abs(meanErr-lastErr) > eps) and epch < 1e6: # while not converged
        if epch%1000==0 and trace:
            print('Iter #%u, error: %f'%(epch,meanErr))
        
        if shuffle:
            seq = getRandomSeq(nData) # random seq for stoch. gradient descent
        else:
            seq = np.arange(nData)
        for n in seq: # loop over data set
            x,y = xMat[n],yMat[n] # index x and y for curr data point
            z,yHat = feedForward(x, y, wtOut, wtHidd) # feedforward
            wtOut,wtHidd = backProp(y, yHat, x, z, wtOut, wtHidd) # update weight
################################################################################
        lastErr = meanErr
        zs,yHats = feedForward(xMat, yMat, wtOut, wtHidd) # fitted Y for this epoch
        meanErr = crossEntNK(yHats, yMat) # err for this epoch
        
        if meanErr > lastErr:  # slow learning rate if error increase
            eta /= 2
        epch += 1

    if trace: # print final error
        print('Final iteration #%u, error: %f' % (epch-1,meanErr) )
    return (wtHidd,wtOut),epch

def pred_1hidd(xMat, wtsHidd, wtsOut):
    yHat = softMax(sigmoid(concateBias(xMat) @ wtsHidd) @ wtsOut)
    return yHat.argmax(axis=1)

In [None]:
################################################################################
def train_hidd(xMat, yMat, eta, nLayers, nNodesHidd, eps=1e-7, trace=False, 
               shuffle=True):
    def feedForward(xs, ys, wtsOut, wtsHidd):
        zs = sigmoid(xs @ wtsHidd)
        return zs, softMax(zs @ wtsOut)
    
    def backProp(ys, yfit, xs, zs, wtsOut, wtsHidd):
        d_Out = eta * np.outer(zs, ys-yfit)
        d_hidd = eta * np.outer(xs, (wtsOut@(ys-yfit)) * zs*(1-zs))
        return wtsOut + d_Out, wtsHidd + d_hidd
    
    def initWeights(D, K, nHidd, layers, )
    
    xMat = concateBias(xMat)
    (nData,nK),nDim = yMat.shape, xMat.shape[1]
    
    wtOut = np.random.rand(nNodesHidd,nK)/50 - 0.01 # init wts to be (-0.01,0.01)
    wtHidd = np.random.rand(nDim,nNodesHidd)/50 - 0.01
    
    lastErr = np.inf # max error possible
    zs,yHats = feedForward(xMat, yMat, wtOut, wtHidd)
    meanErr = crossEntNK(yHats, yMat)
    
    epch = 0
    while (abs(meanErr-lastErr) > eps) and epch < 1e6: # while not converged
        if epch%1000==0 and trace:
            print('Iter #%u, error: %f'%(epch,meanErr))
        
        if shuffle:
            seq = getRandomSeq(nData) # random seq for stoch. gradient descent
        else:
            seq = np.arange(nData)
        for n in seq: # loop over data set
            x,y = xMat[n],yMat[n] # index x and y for curr data point
            z,yHat = feedForward(x, y, wtOut, wtHidd) # feedforward
            wtOut,wtHidd = backProp(y, yHat, x, z, wtOut, wtHidd) # update weight
################################################################################
        lastErr = meanErr
        zs,yHats = feedForward(xMat, yMat, wtOut, wtHidd) # fitted Y for this epoch
        meanErr = crossEntNK(yHats, yMat) # err for this epoch
        
        if meanErr > lastErr:  # slow learning rate if error increase
            eta /= 2
        epch += 1

    if trace: # print final error
        print('Final iteration #%u, error: %f' % (epch-1,meanErr) )
    return (wtHidd,wtOut),epch

def pred_1hidd(xMat, wtsHidd, wtsOut):
    yHat = softMax(sigmoid(concateBias(xMat) @ wtsHidd) @ wtsOut)
    return yHat.argmax(axis=1)

In [53]:
wt,nn = train_1hidd(trainData, trainLabel, 2, 8, eps=1e-6, trace=True, shuffle=True)

Iter #0, error: 1.098616
Final iteration #460, error: 0.047875


In [78]:
pred_1hidd(testData, *wt) == testLabel.argmax(axis=1)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])