# Algorithm 1: 2-Layer Neural Network

In [24]:
import pandas as pd
import numpy as np

In [25]:
# Imports data
file = open('zipcombo.dat','r')
tempdata = file.read().split('\n')
del tempdata[-1]

In [26]:
# Creates X and y from the data
X = np.zeros((len(tempdata), 256))
y = np.zeros((len(tempdata)))
for i in range(0, len(tempdata)):
    individ = tempdata[i].split(" ")
    del individ[-1]
    X[i] = np.array(individ[1:]).astype(np.float)
    y[i] = np.array(individ[0]).astype(np.float)

In [27]:
# Transforms y to one hot vectors
def onehot(y):
    onehoty = np.zeros((10, len(y)))
    for i in range(0, len(y)):
        onehoty[int(y[i])][i] = 1
    return onehoty

In [28]:
# Adds an additional dimension of 1's to X to act as a bias
def biasx(X):
    ones = np.ones((len(X), 1))
    Xb = np.c_[X, ones]
    Xb = Xb.reshape(len(X), 257)
    return Xb

In [29]:
# Runs data transformations
def fixdata(X, y):
    bX = biasx(X)
    ohy = onehot(y)
    return bX, ohy

In [30]:
# Splits data into 80% train and 20% test
def train_test_split(X,y):
    randsamp = np.random.rand(X.shape[0])
    split = randsamp < np.percentile(randsamp, 80)
    X_train = X[split].astype(np.float)
    y_train = y[split].astype(np.float) 
    X_test =  X[~split].astype(np.float) 
    y_test = y[~split].astype(np.float)
    return X_train, y_train, X_test, y_test

In [31]:
# Splits data into 5-folds for cross-validation
def five_fold_split(X):
    s = int(len(X)/5)
    ind = np.arange(len(X)).astype(int)
    train_index = np.zeros((5, len(X)-s))
    test_index = np.zeros((5, s))
    for i in range(0, 5):
        test_index[i] = ind[0+i*s:0+(i+1)*s]
        train_index[i] = np.delete(ind, test_index[i])        
    return train_index, test_index

In [44]:
# Generates table of values
def df_generator(amean, astd, bmean, bstd, indexlabels, columnlabels, shape):
    str1 = np.char.add(np.char.mod('%.5f', amean), "+-")
    str1 = np.char.add(str1,np.char.mod('%.5f', astd))
    str2 = np.char.add(np.char.mod('%.5f', bmean), "+-")
    str2 = np.char.add(str2,np.char.mod('%.5f', bstd))
    df = pd.DataFrame(data = np.array([str1,str2]).reshape(shape), index = indexlabels, columns = columnlabels)
    return df

In [45]:
# Generates table of values
def df_generator2(amean, astd, bmean, bstd, indexlabels, columnlabels, shape):
    str1 = np.char.add(np.char.mod('%.5f', amean), "+-")
    str1 = np.char.add(str1,np.char.mod('%.5f', astd))
    str2 = np.char.add(np.char.mod('%.5f', bmean), "+-")
    str2 = np.char.add(str2,np.char.mod('%.5f', bstd))
    df = pd.DataFrame(data=np.array([str1,str2]).T, index = indexlabels, columns = columnlabels)
    return df

In [33]:
# Computes error
def error_calculator(prediction, label): 
    mistakes = prediction[np.where(prediction != label)]
    error = len(mistakes)/len(label)
    return error

In [34]:
# Computes sigmoid function
def sigmoid(x):
    sig = 1/(1+np.exp(-x))
    return sig

In [35]:
# Computes softmax function
def softmax(x):
    soft = np.exp(x)/np.sum(np.exp(x), axis=0)
    return soft

In [36]:
# Computes derivative of sigmoid function
def dsigmoid(x):
    f = 1/(1+np.exp(-x))
    f = f*(1-f)
    return f

In [37]:
# Computes gradient
def gradient(act1, yhat, y, W2, W1, fc1, X):
    dW2 = (1/len(y[1]))*(yhat-y)@act1.T
    dW1 = (1/len(y[1]))*(W2.T@(yhat-y)*dsigmoid(fc1))@X
    return dW2, dW1

In [38]:
# Computes forward step
def forward(W1, W2, X):
    fc1 = W1@X.T
    act1 = sigmoid(fc1)
    fc2 = W2@act1
    yhat = softmax(fc2)
    return fc1, act1, yhat

In [39]:
# Updates weights
def backward(W1, W2, dW1, dW2, learning_rate):
    W2 = W2-learning_rate*dW2
    W1 = W1-learning_rate*dW1
    return W2, W1

In [41]:
# Trains neural network
def train(X_train, y_train, epoch, hidden_size, learning_rate):
    W1 = np.random.randn(hidden_size, len(X_train[1]))
    W2 = np.random.randn(10, hidden_size)
    for e in range(0, epoch):
        fc1, act1, yhat = forward(W1, W2, X_train)      
        dW2, dW1 = gradient(act1, yhat, y_train, W2, W1, fc1, X_train)
        W2, W1 = backward(W1, W2, dW1, dW2, learning_rate)     
    error = test(X_train, y_train, W1, W2, hidden_size)
    return W1, W2, error

In [42]:
# Tests neural network
def test(Xtest, ytest, W1, W2, hidden_size):
    fc1, act1, yhat = forward(W1, W2, Xtest)
    prediction = np.argmax(yhat, axis = 0)
    label = np.argmax(ytest, axis = 0)
    error = error_calculator(prediction, label)
    return error

### Basic Results

In [43]:
# Runs training and testing for specified number of runs
def train_test(X, y, runs, epoch, hidden_size, learning_rate):
    Etrain = np.zeros((runs))
    Etest = np.zeros((runs))
    for e in range(0, runs):
        X_train, y_train, X_test, y_test = train_test_split(X, y)
        bX_train, ohy_train = fixdata(X_train, y_train)
        bX_test, ohy_test = fixdata(X_test, y_test)
        W1, W2, Etrain[e] = train(bX_train, ohy_train, epoch, hidden_size, learning_rate)
        Etest[e] = test(bX_test, ohy_test, W1, W2, hidden_size)
    return np.mean(Etrain), np.std(Etrain), np.mean(Etest), np.std(Etest)

In [46]:
hidden_sizes=[240, 220, 200, 180, 160, 140, 120]
Etrain = np.zeros((7))
stdtrain = np.zeros((7))
Etest = np.zeros((7))
stdtest = np.zeros((7))
for h in range(len(hidden_sizes)):
    Etrain[h], stdtrain[h], Etest[h], stdtest[h] = train_test(X, y, 20, 2000, hidden_sizes[h], 1)       

In [47]:
df1 = df_generator2(Etrain, stdtrain, Etest, stdtest, 
                  ["H=240", "H=220", "H=200", "H=180", "H=160", "H=140", "H=120"],
                  ["Train Error", "Test Error"], (7, 2))
df1.style

Unnamed: 0,Train Error,Test Error
H=240,0.00022+-0.00011,0.08704+-0.00620
H=220,0.00030+-0.00016,0.08849+-0.00576
H=200,0.00040+-0.00019,0.08750+-0.00532
H=180,0.00050+-0.00022,0.08505+-0.00540
H=160,0.00067+-0.00027,0.08341+-0.00581
H=140,0.00127+-0.00046,0.08446+-0.00565
H=120,0.00220+-0.00048,0.08430+-0.00468


### Cross-Validation

In [54]:
# Performs 5-fold cross validation
def cross_validation(X_train, y_train, epoch, hidden_sizes, learning_rate):
    train_in, test_in = five_fold_split(X_train)
    minE = 1000
    besths = 1
    for h in range(len(hidden_sizes)):
        for i in range(0, 5):
            train_index = train_in[i].astype(int)
            test_index = test_in[i].astype(int)
            X_trainfold = X_train[train_index[0]:train_index[len(train_index)-1]]
            y_trainfold = y_train[train_index[0]:train_index[len(train_index)-1]]
            bX_train, ohy_train = fixdata(X_trainfold,y_trainfold)
            W1,W2,_ = train(bX_train, ohy_train, epoch, hidden_sizes[h], learning_rate)
            X_testfold = X_train[test_index[0]:test_index[len(test_index)-1]]
            y_testfold = y_train[test_index[0]:test_index[len(test_index)-1]]
            bX_test, ohy_test = fixdata(X_testfold, y_testfold)
            foldE = test(bX_test, ohy_test, W1, W2, hidden_sizes[h])
        if foldE < minE:
            besths = hidden_sizes[h]
            minE = foldE
    return besths

In [55]:
# Performs cross validation on train data and uses optimal parameter h* to train
# and test full dataset
def cross_train_test(X, y, runs, epoch, hidden_sizes, learning_rate):
    Etest = np.zeros((runs))
    hsstar = np.zeros((runs))
    for e in range(0, runs):
        X_train, y_train, X_test, y_test = train_test_split(X, y)
        besths = cross_validation(X_train, y_train, epoch, hidden_sizes, learning_rate)  
        bX_train, ohy_train = fixdata(X_train, y_train)
        bX_test, ohy_test = fixdata(X_test, y_test)
        W1, W2, _ = train(bX_train, ohy_train, epoch, besths, learning_rate)
        Etest[e] = test(bX_test, ohy_test, W1, W2, besths)
        hsstar[e] = besths
        print("H* = ", besths)
        print("Test error: ", np.mean(Etest[e]), "+-", np.std(Etest[e]))
        
    return Etest, hsstar

In [None]:
E, besths = cross_train_test(X, y, 20, 2000, hidden_sizes, 1)

In [57]:
df2 = df_generator(np.mean(besths), np.std(besths), np.mean(E), np.std(E), [""], 
                   ["Mean hs*", "Mean Test Error"], (1, 2))
df2.style

Unnamed: 0,Mean hs*,Mean Test Error
,235.00000+-10.72381,0.09065+-0.00618
