In [75]:
import numpy as np

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=1).reshape(-1,1)


class NeuralNetwork(object):
    def __init__(self, layers = [2, 10, 1], activations=['sigmoid', 'sigmoid']):
        assert(len(layers) == len(activations)+1)
        self.layers = layers
        self.activations = activations
        self.weights = []
        self.biases = []
        for i in range(len(layers)-1):
            self.weights.append(np.random.randn(layers[i], layers[i+1]))
            self.biases.append(np.random.randn(1, layers[i+1]))
    

    def feedforward(self, x):
        # return the feedforward value for x
        a = np.copy(x)
        z_s = []
        a_s = [a]
        for i in range(len(self.weights)):
            activation_function = self.getActivationFunction(self.activations[i])
            #z_s.append(self.weights[i].dot(a) + self.biases[i])
            z_s.append(a.dot(self.weights[i]) + self.biases[i])
            a = activation_function(z_s[-1])
            a_s.append(softmax(a))
        return (z_s, a_s)


    def backpropagation(self,y, z_s, a_s):
            dw = []  # dC/dW
            db = []  # dC/dB
            deltas = [None] * len(self.weights)  # delta = dC/dZ  known as error for each layer
            # insert the last layer error
            # deltas[-1] = ((y-a_s[-1])*(self.getDerivitiveActivationFunction(self.activations[-1]))(z_s[-1]))
            deltas[-1] = y-a_s[-1]

            # Perform BackPropagation
            for i in reversed(range(len(deltas)-1)):
                #deltas[i] = self.weights[i+1].T.dot(deltas[i+1])*(self.getDerivitiveActivationFunction(self.activations[i])(z_s[i]))  
                deltas[i] = deltas[i+1].dot(self.weights[i+1].T)*(self.getDerivitiveActivationFunction(self.activations[i])(z_s[i]))  

            #a= [print(d.shape) for d in deltas]
            batch_size = y.shape[0]
            # db = [d.dot(np.ones((batch_size,1)))/float(batch_size) for d in deltas]
            # dw = [d.dot(a_s[i].T)/float(batch_size) for i,d in enumerate(deltas)]

            db = [np.ones((1, batch_size)).dot(d)/float(batch_size) for d in deltas]
            dw = [a_s[i].T.dot(d)/float(batch_size) for i,d in enumerate(deltas)]

            # return the derivitives respect to weight matrix and biases
            return dw, db


    def train(self, x, y, batch_size=10, epochs=100, lr = 0.01):
    # update weights and biases based on the output
        for e in range(epochs): 
            i=0
            while(i<len(y)):
                x_batch = x[i:i+batch_size]
                y_batch = y[i:i+batch_size]
                i = i+batch_size
                z_s, a_s = self.feedforward(x_batch)
                dw, db = self.backpropagation(y_batch, z_s, a_s)
                self.weights = [w+lr*dweight for w,dweight in  zip(self.weights, dw)]
                self.biases = [w+lr*dbias for w,dbias in  zip(self.biases, db)]
            
            print("Epoch {}: loss = {}".format(e, np.linalg.norm(a_s[-1]-y_batch)), end="\n")


    @staticmethod
    def getActivationFunction(name):
        if(name == 'sigmoid'):
            return lambda x : np.exp(x)/(1+np.exp(x))
        elif(name == 'linear'):
            return lambda x : x
        elif(name == 'relu'):
            def relu(x):
                y = np.copy(x)
                y[y<0] = 0
                return y
            return relu
        else:
            print('Unknown activation function. linear is used')
            return lambda x: x
    

    @staticmethod
    def getDerivitiveActivationFunction(name):
        if(name == 'sigmoid'):
            sig = lambda x : np.exp(x)/(1+np.exp(x))
            return lambda x :sig(x)*(1-sig(x)) 
        elif(name == 'linear'):
            return lambda x: 1
        elif(name == 'relu'):
            def relu_diff(x):
                y = np.copy(x)
                y[y>=0] = 1
                y[y<0] = 0
                return y
            return relu_diff
        else:
            print('Unknown activation function. linear is used')
            return lambda x: 1

In [76]:
#import matplotlib.pyplot as plt
import tensorflow as tf


In [77]:
#download fashion mnist dataset
fashion_mnist = tf.keras.datasets.fashion_mnist
(X, y), (test_images, test_labels) = fashion_mnist.load_data()

#train_set_count = len(train_labels)
#test_set_count = len(test_labels)

#normalize images
X = X / 255.0
test_images = test_images / 255.0

In [78]:
X_new = X.reshape(-1, 28*28)
y_new = tf.keras.utils.to_categorical(
    y, num_classes=10, dtype='float32'
)
y_new = y_new.reshape(-1, 10)

In [79]:
print(X_new.shape, y_new.shape)

(60000, 784) (60000, 10)


In [81]:
nn = NeuralNetwork([784, 32, 10],activations=['relu', 'linear'])

nn.train(X_new, y_new, epochs=100, batch_size=100, lr = .01)
#_, a_s = nn.feedforward(X_new)

Epoch 0: loss = 8.817486729146328
Epoch 1: loss = 8.020484532557296
Epoch 2: loss = 7.733619594034876
Epoch 3: loss = 7.536883139431679
Epoch 4: loss = 7.38507688155297
Epoch 5: loss = 7.258183536793553
Epoch 6: loss = 7.132207892702848
Epoch 7: loss = 7.025584259383275
Epoch 8: loss = 6.976699819773272
Epoch 9: loss = 6.921042195431771
Epoch 10: loss = 6.895732242476954
Epoch 11: loss = 6.861076053000031
Epoch 12: loss = 6.840520021534309
Epoch 13: loss = 6.8212698146156105
Epoch 14: loss = 6.796864719452753
Epoch 15: loss = 6.776249655514505
Epoch 16: loss = 6.7511499117818685
Epoch 17: loss = 6.724901965436472
Epoch 18: loss = 6.706519240519335
Epoch 19: loss = 6.677520518958638
Epoch 20: loss = 6.654211382449374
Epoch 21: loss = 6.623653083611586
Epoch 22: loss = 6.592127775536609
Epoch 23: loss = 6.5634339765203205
Epoch 24: loss = 6.540652352117441
Epoch 25: loss = 6.520975761762829
Epoch 26: loss = 6.501230932650896
Epoch 27: loss = 6.482764245921384
Epoch 28: loss = 6.464496686

In [43]:
x=np.array([1,2,3,4,5,6,7,8,9,0]).reshape(2,10)

e_x = np.exp(x-np.max(x)) 

e_x / e_x.sum(axis=1)

array([[2.12062451e-04, 5.76445508e-04, 1.56694135e-03, 4.25938820e-03,
        1.15782175e-02, 3.14728583e-02, 8.55520989e-02, 2.32554716e-01,
        6.32149258e-01, 7.80134161e-05]])

In [55]:
X = 2*np.pi*np.random.rand(1000).reshape(1, -1)
y = np.sin(X)

In [56]:
X.shape

(1, 1000)

In [58]:
len(y)

1