In [1]:
import os
import numpy as np, pandas as pd
from utilities import discretizeMean, oneHot, normalizeDF, makeClassMat
from crossValidate import getXVFolds
from ANNmath import concateBias, sigmoid, softMax, crossEntNK, getRandomSeq

In [4]:
from NN_0hidd import train_0hidd, pred_0hidd
from NN_1hidd import train_1hidd, pred_1hidd
from NN_2hidd import train_2hidd, pred_2hidd

In [None]:
bc_WI_data = os.path.join('./data/', 'breast-cancer-wisconsin.data')
bc_WI_names = ['id', 'clumpThick', 'unifSize', 'unifShape', 'margAdhsn', 
               'epithSize', 'bareNuclei', 'blandChrom', 'normNucleo', 
               'mitoses', 'class']
raw = pd.read_csv(bc_WI_data , names=bc_WI_names)  # read CSV file
missRow = (raw=='?').any(axis=1).values # rows with missing data
raw = raw[~missRow] # remove rows with missing
raw = raw.apply(pd.to_numeric, errors= 'coerce') # conv to numeric data
bcFeats = bc_WI_names[1:-1] # list of feature variables
classVec = makeClassMat(raw['class']==4)
dataMat = raw[bcFeats].values

In [None]:
vote84Data = os.path.join('./data/', 'house-votes-84.data')
vote84Names = ['party', 'infant', 'water', 'budget', 'doctorfee', 'salvador',
                'religion', 'satellite', 'contras', 'missile', 'immigration',
                'synfuels', 'education', 'superfund', 'crime', 'exports',
                'ZAF']
raw = pd.read_csv(vote84Data , names=vote84Names ) # read in vote file
oneHotCols = oneHot(raw,['water','education','ZAF'])
# remove variables with completed one-hot coding from list of variables
yesVars = np.setdiff1d(vote84Names[1:],['water','education','ZAF'])
yesVote = raw.loc[:,yesVars] == 'y' # boolean for vote='yes' for rest of vars
yesVote.columns = [s+'_y' for s in yesVote.columns]
voteData = pd.concat([yesVote,oneHotCols], axis=1) # concat two dataframes
repub = raw['party']=='republican' # boolean for republicans
classVec = makeClassMat(repub)# vector of 0 & 1 for calculation

In [None]:
soyData = os.path.join('./data/', 'soybean-small.data')
# use cardinal number for feature names, like c01 for 1st col, etc
soyNames = ['c%02d'%(n+1) for n in range(35)] + ['class'] 
raw = pd.read_csv(soyData, names=soyNames)
feats = np.array(soyNames)[raw.nunique()!=1] # remove feats with only 1 value
feats = feats[raw[feats].nunique() == 2] # remove if non-binomial features

tmpDF = pd.DataFrame()
for f in feats: # loop over features
    tmpDF[f] = (raw[f] == raw[f].unique()[0]) # if feature is first uniq val
dataMat = tmpDF.values * 1 # change to 1's and 0's
classVec = makeClassMat(raw['class']) # all classes

In [None]:
dataMat.mean(axis=1)

In [None]:
print("test %s"%dataMat.mean(axis=1))

In [2]:
irisFile = os.path.join('./data/', 'iris.data')
irisName = ['sepalLen', 'sepalWth', 'petalLen', 'petalWth', 'class']
raw = pd.read_csv(irisFile , names=irisName)  # read CSV file
irisTypes = makeClassMat(raw['class'])
irisMat = normalizeDF(raw[irisName[:-1]])

In [3]:
folds = getXVFolds(irisMat, irisTypes, categorical=True)
testIdx = folds[0]
trainIdx = np.hstack(folds[1:])
trainData,trainLabel = irisMat[trainIdx],irisTypes[trainIdx]
testData,testLabel = irisMat[testIdx],irisTypes[testIdx]

In [None]:
################################################################################
def train_0hidd(xMat, yMat, eta, eps=1e-7, trace=False, shuffle=True):
    def feedForward(xs, ys, wts):
        return softMax(xs @ wts)
    
    def backProp(ys, yfit, xs, wts):
        return wts + eta * np.outer(xs, ys-yfit)
    
    xMat = concateBias(xMat) # add bias terms
    (nData,nK),nDim = yMat.shape, xMat.shape[1] # size of data and classes
    
    wt = np.random.rand(nDim,nK)/50 - 0.01 # init wts to be (-0.01,0.01)
    lastErr = np.inf # max error possible
    yHats = feedForward(xMat, yMat, wt) # first feedforward calc
    meanErr = crossEntNK(yHats, yMat) # error from random weights
    
    epch = 0
    while (abs(meanErr-lastErr) > eps) and epch < 1e6: # while not converged
        if epch%1000==0 and trace:
            print('Iter #%u, error: %f'%(epch,meanErr))
        
        if shuffle: # shuffle sequence of gradient descent
            seq = getRandomSeq(nData) # random seq for stoch. gradient descent
        else:
            seq = np.arange(nData)
        for n in seq: # loop over data set
            x,y = xMat[n],yMat[n] # index x and y for curr data point
            yHat = feedForward(x, y, wt) # feedforward
            wt = backProp(y, yHat, x, wt) # update weight
        
        lastErr = meanErr
        yHats = feedForward(xMat, yMat, wt) # fitted Y for this epoch
        meanErr = crossEntNK(yHats, yMat) # err for this epoch
        
        if meanErr > lastErr:  # slow learning rate if error increase
            eta /= 5
        epch += 1

    if trace: # print final error
        print('Final iteration #%u, error: %f' % (epch-1,meanErr) )
    return wt,epch

def pred_0hidd(xMat, wts):
    yHat = softMax(concateBias(testData) @ wts)
    return yHat.argmax(axis=1)

In [None]:
v = np.random.rand(4,6)/10
concateBias(testData[0] @ v) @ np.random.rand(6+1, 3)
print(v)
v[:, :-1]

In [None]:
wt,nn = train_1hidd(trainData, trainLabel, 3, 8, eps=1e-6, trace=True, shuffle=True)
pred_1hidd(testData, *wt) == testLabel.argmax(axis=1)

In [None]:
################################################################################
def train_1hidd(xMat, yMat, eta, nNodes, eps=1e-7, trace=False, shuffle=True):
    def feedForward(xs, ys, wtsOut, wtsHidd):
        zs = concateBias( sigmoid(xs@wtsHidd) )
        return zs, softMax(zs @ wtsOut)
    
    def backProp(ys, yfit, xs, zs, wtsOut, wtsHidd):
        d_Out = eta * np.outer(zs, ys-yfit)
        d_hidd = eta * np.outer(xs, wtsOut@(ys-yfit) * (zs*(1-zs)))[:,:-1]
        return wtsOut + d_Out, wtsHidd + d_hidd
    
    xMat = concateBias(xMat)
    (nData,nK),nDim = yMat.shape, xMat.shape[1]
    
    wtOut = np.random.rand(nNodes+1,nK)/50 - 0.01 # init wts to be (-0.01,0.01)
    wtHidd = np.random.rand(nDim,nNodes)/50 - 0.01
    
    lastErr = np.inf # max error possible
    zs,yHats = feedForward(xMat, yMat, wtOut, wtHidd)
    meanErr = crossEntNK(yHats, yMat)
    
    epch = 0
    while (abs(meanErr-lastErr) > eps) and epch < 1e6: # while not converged
        if epch%1000==0 and trace:
            print('Iter #%u, error: %f'%(epch,meanErr))
        
        if shuffle:
            seq = getRandomSeq(nData) # random seq for stoch. gradient descent
        else:
            seq = np.arange(nData)
        for n in seq: # loop over data set
            x,y = xMat[n],yMat[n] # index x and y for curr data point
            z,yHat = feedForward(x, y, wtOut, wtHidd) # feedforward
            wtOut,wtHidd = backProp(y, yHat, x, z, wtOut, wtHidd) # update weight
################################################################################
        lastErr = meanErr
        zs,yHats = feedForward(xMat, yMat, wtOut, wtHidd) # fitted Y for this epoch
        meanErr = crossEntNK(yHats, yMat) # err for this epoch
        
        if meanErr > lastErr:  # slow learning rate if error increase
            eta /= 2
        epch += 1

    if trace: # print final error
        print('Final iteration #%u, error: %f' % (epch-1,meanErr) )
    return (wtOut,wtHidd),epch

def pred_1hidd(xMat, wtsOut, wtsHidd):
    z = sigmoid(concateBias(xMat) @ wtsHidd)
    yHat = softMax(concateBias(z) @ wtsOut)
    return yHat.argmax(axis=1)

In [7]:
wt,nn,err = train_2hidd(trainData, trainLabel, 3, 6, eps=1e-7, trace=True, shuffle=False)
pred_2hidd(testData, *wt) == testLabel.argmax(axis=1)

Iter #0, error: 1.098633
Iter #1000, error: 0.049496
Iter #2000, error: 0.047250
Iter #3000, error: 0.029844
Iter #4000, error: 0.004723
Iter #5000, error: 0.001255
Iter #6000, error: 0.000737
Iter #7000, error: 0.000522
Final iteration #7783, error: 0.000426


array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False,  True])

In [8]:
wt

(array([[ 10.50657934,   1.26281465, -11.77279206],
        [  8.37251125,   0.88914978,  -9.25885993],
        [  6.52099472,   7.77065498, -14.28641747],
        [  9.56421464,   1.66261931, -11.2394224 ],
        [  8.24618486,   1.26049761,  -9.50810299],
        [  8.43256651,   1.81869959, -10.25579956],
        [-19.63495177,   2.41395448,  17.2069027 ]]),
 array([[ -3.69141031,  -1.79395101,  -3.56194723,  -3.67133325,
          -2.04963434,  -3.1550329 ],
        [ -2.90488223,  -1.44300358,  -3.81340717,  -2.91609953,
          -1.57952137,  -2.51493279],
        [  4.96434189,  -0.24250175, -10.28926198,   4.91256327,
           2.20025144,   4.12893296],
        [ -4.39035533,  -2.44868414,  -3.45874531,  -4.30472343,
          -2.71326405,  -3.7330607 ],
        [ -6.65454672,  -5.53869992,  -1.50132228,  -6.57788544,
          -5.47474315,  -6.24499868],
        [  4.44398362,  -0.32617749, -10.04452587,   4.39324293,
           1.90894437,   3.66804174],
        [  1.641

In [None]:
################################################################################
def train_2hidd(xMat, yMat, eta, nNodes, eps=1e-7, trace=False, shuffle=True):
    def feedForward(xs, ys, wtsOut, wtsHidd2, wtsHidd1):
        z1s = concateBias( sigmoid(xs@wtsHidd1) )
        z2s = concateBias( sigmoid(z1s@wtsHidd2) )
        return (z1s,z2s), softMax(z2s @ wtsOut)
    
    def backProp(ys, yfit, xs, zs, wtsOut, wtsHidd2, wtsHidd1):
        z1s,z2s = zs
        errO = ys-yfit
        d_Out = eta * np.outer(z2s, errO)
        
        err2 = (wtsOut@errO) * (z2s*(1-z2s))
        d_hidd2 = eta * np.outer(z1s,err2)[:,:-1]
        
        err1 = (wtsHidd2@err2[:-1]) * (z1s*(1-z1s))
        d_hidd1 = eta * np.outer(xs,err1)[:,:-1]
        return wtsOut + d_Out, wtsHidd2 + d_hidd2, wtsHidd1 + d_hidd1
    
    xMat = concateBias(xMat)
    (nData,nK),nDim = yMat.shape, xMat.shape[1]
    
    wtOut = np.random.rand(nNodes+1,nK)/50 - 0.01 # init wts to be (-0.01,0.01)
    wtHidd2 = np.random.rand(nNodes+1,nNodes)/50 - 0.01
    wtHidd1 = np.random.rand(nDim,nNodes)/50 - 0.01
    
    lastErr = np.inf # max error possible
    zs,yHats = feedForward(xMat, yMat, wtOut, wtHidd2, wtHidd1)
    meanErr = crossEntNK(yHats, yMat)
    
    epch = 0
    while (abs(meanErr-lastErr) > eps) and epch < 1e6: # while not converged
        if epch%1000==0 and trace:
            print('Iter #%u, error: %f'%(epch,meanErr))
        
        if shuffle:
            seq = getRandomSeq(nData) # random seq for stoch. gradient descent
        else:
            seq = np.arange(nData)
        for n in seq: # loop over data set
            x,y = xMat[n],yMat[n] # index x and y for curr data point
            z12,yHat = feedForward(x, y, wtOut, wtHidd2, wtHidd1) # feedforward
            wtOut,wtHidd2,wtHidd1 = backProp(y, yHat, x, z12, # update wts
                                             wtOut, wtHidd2, wtHidd1) 
################################################################################
        lastErr = meanErr        # fitted Y for this epoch
        zs,yHats = feedForward(xMat, yMat, wtOut, wtHidd2, wtHidd1) 
        meanErr = crossEntNK(yHats, yMat) # err for this epoch
        
        if meanErr > lastErr:  # slow learning rate if error increase
            eta /= 2
        epch += 1

    if trace: # print final error
        print('Final iteration #%u, error: %f' % (epch-1,meanErr) )
    return (wtOut,wtHidd2,wtHidd1),epch

def pred_2hidd(xMat, wtsOut, wtsHidd2, wtsHidd1):
    z1 = sigmoid(concateBias(xMat) @ wtsHidd1)
    z2 = sigmoid( concateBias(z1) @ wtsHidd2 )
    yHat = softMax(concateBias(z2) @ wtsOut)
    return yHat.argmax(axis=1)

In [None]:
np.ones(3, int)