In [2]:
import os
import numpy as np, pandas as pd
from utilities import discretizeMean, oneHot, normalizeDF, makeClassMat
from crossValidate import getXVFolds

In [3]:
def concateBias(x):
    ''' Concatenate bias=1 term to input by adding a column of ones to the
    right of the inut data. This is useful for training ANN bias.
    '''
    return np.hstack( [x, np.ones((x.shape[0],1), x.dtype)] )

def sigmoid(x):
    ''' Sigmoid function
    '''
    return 1.0 / ( 1 + np.exp(-x) )

def softMax(x):
    ''' Calculate normalized exponential function for weights and input matrix.
    Returns the normalized exponential probabilities for each classes.
    '''
    pr = np.exp(x) # matrix multiply of x and weight
    if pr.ndim > 1:
        return pr / pr.sum(axis=1)[:,None]
    else:
        return pr / pr.sum()

def crossEntNK(yhat, y):
    ''' Cross Entropy function. Used as the error for multi-class ANN.
    Calculate error across classes for all data points, and return mean err.
    '''
    return np.sum(-y*np.log(yhat), axis=1).mean() # avg error over all points

def getRandomSeq(N):
    seq = np.arange(N)
    np.random.shuffle(seq)
    return seq

In [4]:
irisFile = os.path.join('./data/', 'iris.data')
irisName = ['sepalLen', 'sepalWth', 'petalLen', 'petalWth', 'class']
raw = pd.read_csv(irisFile , names=irisName)  # read CSV file
irisTypes = makeClassMat(raw['class'])
irisMat = normalizeDF(raw[irisName[:-1]])

In [12]:
folds = getXVFolds(irisMat, irisTypes, categorical=True)
testIdx = folds[0]
trainIdx = np.hstack(folds[1:])
trainData,trainLabel = irisMat[trainIdx],irisTypes[trainIdx]
testData,testLabel = irisMat[testIdx],irisTypes[testIdx]

In [61]:
################################################################################
def train_0hidd(xMat, yMat, eta, eps=1e-7, trace=False, stochastic=True):
    def feedForward(xs, ys, wts):
        return softMax(xs @ wts)
    
    def backProp(ys, yfit, xs, wts):
        return wts + eta * np.outer(xs, ys-yfit)
    
    xMat = concateBias(xMat)
    (nData,nK),nDim = yMat.shape, xMat.shape[1]
    
    wt = np.random.rand(nDim,nK)/50 - 0.01 # init wts to be (-0.01,0.01)
    lastErr = np.inf # max error possible
    yHats = feedForward(xMat, yMat, wt)
    meanErr = crossEntNK(yHats, yMat)
    
    epch = 0
    while (abs(meanErr-lastErr) > eps) and epch < 1e6: # while not converged
        if epch%1000==0 and trace:
            print('Iter #%u, error: %f'%(epch,meanErr))
        
        if stochastic:
            seq = getRandomSeq(nData) # random seq for stoch. gradient descent
        else:
            seq = np.arange(nData)
        for n in seq: # loop over data set
            x,y = xMat[n],yMat[n] # index x and y for curr data point
        #for x,y in zip(xMat,yMat):
            yHat = feedForward(x, y, wt) # feedforward
            wt = backProp(y, yHat, x, wt) # update weight
        
        lastErr = meanErr
        yHats = feedForward(xMat, yMat, wt) # fitted Y for this epoch
        meanErr = crossEntNK(yHats, yMat) # err for this epoch
        
        if meanErr > lastErr:  # slow learning rate if error increase
            eta /= 5
        epch += 1

    if trace: # print final error
        print('Final iteration #%u, error: %f' % (epch-1,meanErr) )
    return wt,epch

def pred_0hidd(xMat, wts):
    yHat = softMax(concateBias(testData) @ wt)
    return yHat.argmax(axis=1)

In [69]:
################################################################################
def train_1hidd(xMat, yMat, eta, nHidd, eps=1e-7, trace=False, stochastic=True):
    def feedForward(xs, ys, wts):
        return softMax(xs @ wts)
    
    def backProp(ys, yfit, xs, wts):
        return wts + eta * np.outer(xs, ys-yfit)
    
    xMat = concateBias(xMat)
    (nData,nK),nDim = yMat.shape, xMat.shape[1]
    
    wtOut = np.random.rand(nDim,nK)/50 - 0.01 # init wts to be (-0.01,0.01)
    wtHidd = 
    lastErr = np.inf # max error possible
    yHats = feedForward(xMat, yMat, wt)
    meanErr = crossEntNK(yHats, yMat)
    
    epch = 0
    while (abs(meanErr-lastErr) > eps) and epch < 1e6: # while not converged
        if epch%1000==0 and trace:
            print('Iter #%u, error: %f'%(epch,meanErr))
        
        if stochastic:
            seq = getRandomSeq(nData) # random seq for stoch. gradient descent
        else:
            seq = np.arange(nData)
        for n in seq: # loop over data set
            x,y = xMat[n],yMat[n] # index x and y for curr data point
        #for x,y in zip(xMat,yMat):
            yHat = feedForward(x, y, wt) # feedforward
            wt = backProp(y, yHat, x, wt) # update weight
        
        lastErr = meanErr
        yHats = feedForward(xMat, yMat, wt) # fitted Y for this epoch
        meanErr = crossEntNK(yHats, yMat) # err for this epoch
        
        if meanErr > lastErr:  # slow learning rate if error increase
            eta /= 2
        epch += 1

    if trace: # print final error
        print('Final iteration #%u, error: %f' % (epch-1,meanErr) )
    return wt,epch


In [73]:
wt,nn = train_0hidd(trainData, trainLabel, 0.5, eps=1e-6, trace=True, stochastic=True)

Iter #0, error: 1.099841
Iter #1000, error: 0.131205
Iter #2000, error: 0.111484
Iter #3000, error: 0.099544
Iter #4000, error: 0.091456
Iter #5000, error: 0.085571
Iter #6000, error: 0.081075
Iter #7000, error: 0.077514
Iter #8000, error: 0.074614
Iter #9000, error: 0.072203
Iter #10000, error: 0.070162
Iter #11000, error: 0.068410
Iter #12000, error: 0.066888
Iter #13000, error: 0.065553
Iter #14000, error: 0.064370
Final iteration #14029, error: 0.064337


In [74]:
pred_0hidd(testData, wt) == testLabel.argmax(axis=1)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])

In [51]:
testLabel.ndim

2