In [1]:
import numpy as np
import matplotlib.pyplot as pyplot
import mnist

In [15]:
data = np.random.random([1000]) #coming up with arbitrary data and a predictor 
data = np.reshape(data, (100,10)) #reshape to be 100 rows of 10 cols
predict = np.zeros(100)
for i in range(len(data)):
    predict[i] = float(np.random.random()-.5)
    data[i] /= np.sum(data[i]) + predict[i]


In [None]:
#drawing : 
'''     network : dense                    matrices                                  rows correspond to input layer size + bias, columns to output layer size
                                                                                     X is a bias, it should be a constant 1. pad it onto the back of output.
        1       2       3                    1->2           2->3
        [a]             [f]                 [1 , 2         [1, 2, 3 
        [b]     [d]     [g]                  3 , 4          4, 5, 6
        [c]     [e]     [h]                  5 , 6          7, 8, 9]
        [X]     [X]     [X]                  7 , 8]
        
        forward propagation                                                         each row of input1 corresponds to a row of output. 
                            [1, 2        [1a + 3b + 5c + 7X ,2a + 4b + 6c + 8X]     each col of input2 corresponds to a col of output
                             3, 4   =                                               [1,input layer + bias] @ [input layer + bias, outputlayer] = [1,ouputlayer]
        [a, b, c, X]    x    5, 6      
                             7, 8]                                      intuitively, each neuron is represented by a column of the connecting matrix
                                                                        with each row within representing an axon from each row in input. 
        
        first make sure vals[i][-1] = 1
        intuitively, vals[i] @ layers[i] = vals[i+1]
        then pad output layer with a 1.

        back propagation
            Error : [e1, e2, e3] = [?-f , ?-g , ?-h]
            Values : [[input1, input2, input3, X] , [d, e, X] , [f, g, h, X]]
            -1 * learning_rate * [derivative_of_output] @ [error] @ transpose([value]) = adjustment
            for layer n, output is values[n+1], input is values[n]

                                   [e1                           [e1 * f, e1 * g, e1 * h
        transpose(error) @ value =  e2    x    [f, g, h]    =     e2 * f, e2 * g, e2 * h
                                    e3]                           e3 * f, e3 * g, e3 * h]

        matrix 2->3 += ^that * learningrate * -1

        for subsequent layers
            values layer backprop from : [d,e]
            values layer backprop to : [a,b,c,x]

        derivative = derivative([d,e])

        calculate new d,e from errors by transposing matrix 2->3 (reversing input->output direction) and multiplying it by error
        specifically, d is represented by the values in row 1 of matrix 2->3 , [1,2,3]. so the value we want is f*1, g*2, h*3. 
        for e we want f*4,g*5,h*6. f,g,and h are substituted for e1,e2,and e3, so the operation that gets us what we want is 

                                                                         [1,4,7  
        D,E,X = [e1*1+e2*2+e3*3 , e1*4+e2*5+e3*6, extra] = [e1,e2,e3] @   2,5,8     =  error * transverse(matrix2->3)
                                                                          3,6,9]

        D,E * derivative = delta

        adjustments = delta * transpose(matrix 1->2) = [D, E] @ [1,3,5,7  = [D*1 + E*2, D*3 + E*4, D*5 + E*6, D*7 + E*8] = 
                                                                 2,4,6,8]
        matrix 1->2 += adjustments * -1
        
'''                                     

In [268]:
#takes a numpy array of values and returns a numpy array of the same length. 
def Softmax(inputs):
    return np.exp(inputs)/np.sum(inputs)

def RelU(input):
    return np.max(input,0)

def LeakyRelU(input): #maybe dont use this? More of a demonstration of what should be done inline
    #return input * .01 if input < 0 else input
    return 1.0/(1+np.exp(-input)) #testing sigmoid out
    #return  np.nan_to_num(    np.clip(  ((input < 0) & (1)) * .99 * input + .01 * input , a_min=-10, a_max=10  ) )
    
    

def LeakyRelUDeriv(input):
    #return .01 if input < 0 else 1
    #return 1 - .99 * (input < 0) #true one
    return input * (1.0 - input) #sigmoid test

In [192]:
class Network:
    #layers is a list of ints. each int represents the number of neurons in that layer.
    #the first layer must be the number of columns in the input data
    #the last represents the number of possible outputs
    def __init__(self, layers, learningrate = .01):
        self.learningrate = learningrate
        self.vals = list()                                                  #the '"neurons". 
        self.layers = list()                                                #the "axons and dentrites".
        self.numlayers = len(layers)                                        #paradoxically , len(self.layers) = self.numlayers - 1
        self.layersizes = layers
        for i in range(len(layers)):
            self.vals.append(np.zeros((1,layers[i] +1)))                      #our list of row vectors to serve as inputs to the next layer. +1 to add bias value 
            if i > 0 :
                self.layers.append(np.random.rand(layers[i-1] + 1,layers[i] +1 ) * 1/ ((layers[i] ) * (layers[i-1])))   #our matrix to propagate forward from layer n-1 to layer n.
        return

    def predict(self,input):
        #forward propagation
                                                                           #inputs shape should be of shape [1,n]
        np.copyto(self.vals[0][:,0:self.layersizes[0]], input)           #copy values into our vals[0], leaving the last spot for bias
        self.vals[0][:,-1] = 1                                                  #store our 1 for bias
        for i in range(1,self.numlayers):
            self.vals[i] = self.vals[i-1] @ self.layers[i-1]
            self.vals[i] = LeakyRelU(self.vals[i])  if i < self.numlayers -1 else Softmax(self.vals[i])
            self.vals[i][:,-1] = 1
        return np.copy(self.vals[-1][:, : self.layersizes[-1]])                #just return a copy of the last layer without the bias value

    #each row of input should be a row that can be input into predict - each row of testvals a single value that is the correct one. 
    #specifically, testvals indicates an index in the output array that should be 1, while the rest are 0. 
    def train(self, input, testvals, epochs = 1):
        #get predictions on each row of input
        predictions = np.zeros(testvals.shape[0])
        for i in range(epochs):
            totalerror = 0
            for j in range(input.shape[0]):
                if(j%1000==0):
                    print("Progress : %d"%(float(j)/input.shape[0]))
                p = self.predict(input[j])
                #compare them to testvals to get error
                #print("Predictions shape is " , p.shape)
                err = np.zeros( (1, self.layersizes[-1]))
                #print("Error shape is " , err.shape)
                err[0,testvals[j]] = 1
                err = err-p
                err =np.square(err)*1/len(err)                                   #get mse of errors

                #feed error into backpropagate
                self.backpropagate(err)
                totalerror+= np.sum(err)/input.shape[0]
            #print mean error and 
            print(totalerror)
        return

    def test(self, input, testvals):
        return

    def backpropagate(self, error):
        #print("Error is " , error)
        #for 2nd to last layer
        #for each previous one
        #print("Cheaply debugging, its " , (self.learningrate * np.transpose(self.vals[-2][:,:self.vals[-2].shape[1]-1]) @ error  ).shape)
        #print("And layers is ", self.layers[-1].shape)
        #update transition layer to final x using errors
        self.layers[-1][:self.layersizes[-2] , :self.layersizes[-1] ] -= self.learningrate * np.transpose(self.vals[-2][:,:self.vals[-2].shape[1]-1]) @ error     
        for i in range( self.numlayers-2,0, -1 ):
            derivative = LeakyRelUDeriv(self.vals[i])
            d = error @ np.transpose(self.layers[i][:,:self.layersizes[i+1]])
            d = d[:d.shape[1],]                                                                                   #slice off bias value
            delta = d * derivative                                                                                #element wise product. 
            #print("Delta is " , delta.shape)
            #print("Layer i-1 shape is " , self.layers[i-1].shape)
            #print("Layer i shape is " , self.layers[i].shape)
            print(delta)
            self.layers[i-1] += self.learningrate * -1 *np.transpose(self.vals[i-1]) @delta  



In [None]:
#WIP. using this vid as resource : https://www.youtube.com/watch?v=9RN2Wr8xvro&list=PL-nR3Zo5zPQvaNGqElO9-N-1z-4N94qBi&index=1
#but trying to make it easier to use, more general, commented, and without retarded variable naming conventions

In [12]:
mnimg = mnist.train_images().reshape(60000,28**2)
mnlabel = mnist.train_labels()

In [193]:
network = Network([28**2,200,10])

In [97]:
print(network.predict(np.reshape(mnimg[0], (1,28**2) )))

[[326.6435912  326.73873643 326.61554108 326.62863053 326.76071857
  326.73489849 326.65712601 326.73379726 326.62713133 326.69312815]]


In [99]:
print(network.vals[2]/np.sum(network.vals[2]))

[[0.09995724 0.09998635 0.09994865 0.09995266 0.09999308 0.09998518
  0.09996138 0.09998484 0.0999522  0.0999724  0.00030601]]


In [196]:
#network.train(mnimg[:10],mnlabel[:10])

In [311]:
class NetworkV2:
    def __init__(self, layersizes, learningrate = .01):
        self.neurons = list()           #list of row vectors
        self.axons = list()             #matrix connect neurons[i] to neurons[i+1]. dimension is layersizes[i] , layersizes[i+1]
        self.layersizes = layersizes    #the count of neurons in each layer
        self.biases = list()            #bonus connection to each neuron in each layer. same shape as self.neurons
        self.learningrate = .01
        for i in range(len(layersizes)):
            self.neurons.append(np.zeros( (1,layersizes[i]) ))
            self.biases.append( np.random.rand(1,layersizes[i])/ (self.layersizes[i]) )
            if(i>0):
                self.axons.append(np.ones( (self.layersizes[i-1],self.layersizes[i]) ) / (self.layersizes[i-1]) )
        #debug ifo
        print("layersizes length %d" %(len(layersizes)))
        print("neurons length %d" %(len(self.neurons)) )
        print("axons length %d" %(len(self.axons)) )
        print("biases length %d" %(len(self.biases)) )

    def predict(self, input):
        '''
        def predict(self, input):
        input should be a numpy array of shape 1,inputsize. remember to normalize input to 0-1 range
        '''
        np.copyto(self.neurons[0], input)
        for i in range(1,len(self.layersizes)):
            self.neurons[i] = LeakyRelU(self.neurons[i-1] @ self.axons[i-1]) + self.biases[i]       #nk km nm
            
            self.neurons[i] = np.nan_to_num(self.neurons[i],posinf=10,neginf=-10)                       #clear nan
        return np.copy(self.neurons[-1])

    def backpropagate(self,errors):
        '''def backpropagate(self,errors):'''
        np.copyto(self.neurons[-1], errors)
        for i in range(len(self.layersizes)-1,0,-1 ):
            self.axons[i-1] += np.transpose(-self.learningrate * self.neurons[i-1]) @ self.neurons[i]               #nk km nm  so Trans(neurons[i-1] ) @ neurons[i] 
            self.biases[i] += -self.learningrate * self.neurons[i]                                                  #update biases
            deriv  = (LeakyRelUDeriv(self.neurons[i-1]) if i < len(self.layersizes)-1 else self.neurons[i-1])       #get derivative of this layer of neurons
            self.neurons[i-1] = deriv * (self.neurons[i] @ np.transpose(self.axons[i-1]))                           #backpropagate errors into previous layer
            
            self.axons[i-1] = np.nan_to_num(self.axons[i-1], posinf=10, neginf=-10)                                 #get rid of weird values.
            self.biases[i-1] = np.nan_to_num(self.biases[i-1], posinf=10, neginf=-10)
            self.neurons[i-1] = np.nan_to_num(self.neurons[i-1], posinf=10, neginf=-10)
        return
    
    def train(self,vals,labels,epochs = 1):
        '''
        def train(self,vals,labels):
        gradient descent styled training. labels should be indexes of the output that are "correct" 
        '''
        originallearningrate = self.learningrate
        for e in range(epochs):
            self.learningrate/=1.5
            for i in range(vals.shape[0]):
                #print(i)
                p = self.predict(vals[i])
                correct = np.zeros((1,self.layersizes[-1]))
                correct[0,labels[i]] = 1
                correct = Softmax(correct)
                err = p - correct
                err = np.nan_to_num(err, posinf=10, neginf=-10)
                self.backpropagate(err)
        self.learningrate = originallearningrate
network2 = NetworkV2([28**2,10],learningrate =.1 )

layersizes length 2
neurons length 2
axons length 1
biases length 2


In [36]:
mnimg[0,:].shape

(784,)

In [313]:
network2 = NetworkV2([28**2,10], learningrate=.01)
network2.train(mnimg[0:60000,:]/256 , mnlabel[0:60000], epochs=3)

layersizes length 2
neurons length 2
axons length 1
biases length 2


In [261]:
print(network2.axons[0])

[[0.00127551 0.00127551 0.00127551 ... 0.00127551 0.00127551 0.00127551]
 [0.00127551 0.00127551 0.00127551 ... 0.00127551 0.00127551 0.00127551]
 [0.00127551 0.00127551 0.00127551 ... 0.00127551 0.00127551 0.00127551]
 ...
 [0.00127551 0.00127551 0.00127551 ... 0.00127551 0.00127551 0.00127551]
 [0.00127551 0.00127551 0.00127551 ... 0.00127551 0.00127551 0.00127551]
 [0.00127551 0.00127551 0.00127551 ... 0.00127551 0.00127551 0.00127551]]


In [262]:
network2.predict(mnimg[5:6,:])

array([[ 1.37472654, -9.46812843, 10.22995192,  5.78471996, -9.63036773,
        -9.62670449,  3.70113074,  3.32186127,  9.94153154,  4.7583525 ]])

In [201]:
mnlabel[4]

9

In [185]:
#

In [207]:
np.argmax([1,2,3])

2

In [314]:
numcorrect= 0
total = 0
for i in range(1000,2000,1):
    if np.argmax(network2.predict(mnimg[i:i+1,:]))==mnlabel[i]:
        numcorrect+=1
    total+=1
print(numcorrect)
print(total)

565
1000


In [259]:
x =  network2.axons[0]