In [4]:
import numpy as np
import math
import matplotlib
import matplotlib.pyplot as plt
import copy
import feedforward as ff

In [16]:
def makeBatches(patterns, targets, batchSize):
    #returns list containing lists of batches. returns allpat[numBatches][batchSize][np.array(784,)]
    #shuffle patterns and select first batch
    allpat = []
    alltar = []
    
    #shuffle the dataset
    shuffIndex = np.random.permutation(len(patterns))
    for bat in range(int(len(patterns) // batchSize)):
        batchPat = []
        batchTar = []
        for i in range(batchSize):
            index = shuffIndex[(i + batchSize*bat)]
            #print(index, (i+batchSize*bat))
            batchPat.append(patterns[index])
            batchTar.append(targets[index])
        allpat.append(batchPat)
        alltar.append(batchTar)
    
    #if the dataset not evenly split by batches
    if len(patterns) % batchSize != 0:
        batchPat = []
        batchTar = []
        last = len(patterns) % batchSize
        for i in range(last - 1, -1, -1):
            index = shuffIndex[-(i + 1)]
            #print(index, -(i + 1))
            batchPat.append(patterns[index])
            batchTar.append(targets[index])
        allpat.append(batchPat)
        alltar.append(batchTar)
    return shuffIndex, allpat, alltar
imgtr, tartr, imgte, tarte = ff.make_dataset("mnist_train_100.csv", "mnist_test_10.csv")
ind, pat, tar = makeBatches(imgtr, tartr, 2)
#print(np.shape(pat[0][0]), np.shape(tar[2][0])[0], len(tar[2]))

100 784 100 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.] 1
10 10
100 100
784 10


In [3]:
def sumOfSquareErr(output, target):
    return 0.5 * np.sum(np.power(output - target, 2))
sumOfSquareErr(np.array([2., 2.]), np.array([1., 2.]))

0.5

In [67]:
weights = []
neuronsPerLayer = [784, 625, 625, 10]
#Note: Think of multiplying a weight matrix with an activation vector.
for layer in range(len(neuronsPerLayer) - 1):
    weights.append(np.random.randn(neuronsPerLayer[layer + 1], neuronsPerLayer[layer]))
np.shape(np.array(imgtr[:10]))

(10, 784)

In [68]:
def forward(inputs, weights):
    #inputs must be 784 x n
    hidden = [inputs.T]
    for l in range(len(weights)):
        if l == 0:
            hidden.append(ff.sigmoid(weights[l] @ inputs.T))
            print(np.shape(hidden[-1]))
        elif l == len(weights) -1:
            output = ff.softmax(weights[l] @ hidden[-1])
            print(np.shape(output))
        else:
            hidden.append(ff.sigmoid(weights[l] @ hidden[-1]))
            print(np.shape(hidden[-1]))
    return hidden, output
hid, out = forward(np.array(imgtr[:12]), weights)
print(np.shape(hid[0]), np.shape(out))
print(len(hid), np.shape(weights[-1]))

(625, 12)
(625, 12)
(10, 12)
(784, 12) (10, 12)
3 (10, 625)


In [70]:
def back(inputs, activeHs, outputs, weights):
    for l in range(len(weights) - 1, -1, -1):
        hid = activeHs[l]
        w = weights[l]
        if l == len(weights) - 1:
            weights[l] = np.transpose(hid @ outputs.T)
        else:
            weights[l] = (weights[-1].T @ outputs).T * ff.sigPrime(hid.T)
    return weights
weights = back(np.array(imgtr[:12]), hid, out, weights)

ValueError: operands could not be broadcast together with shapes (12,625) (12,784) 

In [None]:
def trainBatch(batch, learningRate, neuronsPerLayer, targets, *weights):
    errors, deltaK, ins, xs  = [], [], [], []
    batchLoss = []
    numCorrect = 0
    
    if weights:
        weights = weights[0]
    
    if not weights:
        weights = []
        #Note: Think of multiplying a weight matrix with an activation vector.
        for layer in range(len(neuronsPerLayer) - 1):
            weights.append(np.random.randn(neuronsPerLayer[layer + 1], neuronsPerLayer[layer]))
    
    #Loop through all input vectors, train the network, plot the error
    for p in range(len(batch)):

        #forward pass through network, find all activations
        activations = ff.forwardprop(batch[p], weights)
        
        #summed output errors for batch
        deltaK.append(activations[-1] - targets[p])
        
        
        #calculate sum-of-squared loss
        batchLoss.append(sumOfSquareErr(activations[-1], targets[p]))
        
        #calculate network error
        error = ff.crossEntError(activations[-1], targets[p])
        
        #add to list of errors
        errors.append(error)
        
        #Other check
        if int(error) == 0:
            numCorrect += 1
        xs.append(batch(p))
        ins.append(activations[:-1])

    #calculate rest of deltas with backpropagation
    allDeltas = ff.backprop(activations, weights, deltaK / len(batch))

    #update weights
    weights = ff.updateWeights(learningRate, allDeltas, activations, weights)

    #calculate accuracy
    acc = (numCorrect / len(batch)) * 100
    
    #calculate average error and loss
    avgErr = np.average(np.asarray(errors))
    avgLoss = np.average(np.asarray(batchLoss))

    return weights, avgErr, avgLoss, acc
test, err, loss, acc = trainBatch([inputVec], .001, [10, 10, 10], [targets])

In [None]:
def train_stochastically(allpats, alltargs, learningRate, neuronsPerLayer, *weights):
    #takes pre-shuffled and separated batches, trains network in batches
    errors = []
    losses = []
    accs = []
    
    #Initialize error plot
    fig, ax = plt.subplots()
    
    #first batch
    for batch in range(len(allpats)):
        if batch == 0:
            weights, err, loss, acc = trainBatch(allpats[batch], learningRate, neuronsPerLayer, alltargs[batch])
        else:
            weights, err, loss, acc = trainBatch(allpats[batch], learningRate, neuronsPerLayer, alltargs[batch], weights)
        errors.append(err)
        losses.append(loss)
        accs.append(acc)
        print("[Batch: ", batch, " Cross-ent Error: ", err, " Loss: ", loss, " Accuracy: ", acc, "%]")
        
    #plot the error
    ax.plot(errors, marker= '.')
    ax.set(xlabel = 'Iteration', ylabel= 'Cross Entropy Error', title= 'Network Error Graph')
    plt.show()

    return weights, errors, losses, acc
weights, err, loss, acc = train_stochastically(pat, tar, .05, [784, 625, 625, 10])
weights, err, loss, acc = train_stochastically(pat, tar, .05, [784, 625, 625, 10], weights)
weights, err, loss, acc = train_stochastically(pat, tar, .05, [784, 625, 625, 10], weights)
weights, err, loss, acc = train_stochastically(pat, tar, .05, [784, 625, 625, 10], weights)

In [5]:
t = [0, 1, 2, 2, 3, 4]
t[:-1]

[0, 1, 2, 2, 3]