In [3]:
import numpy as np
from numpy.linalg import norm
import copy
import os

In [4]:
def relu(x):
    return x*(np.sign(x)+1.)/2.

In [5]:
def sigmoid(x):
    return 1./(1.+np.exp(-x))

In [6]:
def softmax(x):
    return np.exp(x)/sum(np.exp(x))

In [7]:
def mynorm(Z):
    return np.sqrt(np.mean(Z**2))

In [8]:
# Define a three layer feed-forward network

In [9]:
def myANN(Y,Xtrain,Xpred,W01,W02,W03,b01,b02,b03):
    W1 = copy.copy(W01)
    W2 = copy.copy(W02)
    W3 = copy.copy(W03)
    b1 = copy.copy(b01)
    b2 = copy.copy(b02)
    b3 = copy.copy(b03)
    
    k = 1
    change = 999
    
    while(change>0.001 and k<201):
        print("Iteration",k)
        
        Z1 = sigmoid(W1 @ Xtrain + b1)
        Z2 = sigmoid(W2 @ Z1+ b2)
        Yhat = softmax(W3 @ Z2 + b3)
        loss = -Y @ np.log(Yhat)
        print("Current Loss:",loss)
        
        dLdb3 = Yhat - Y 
        dLdW3 = np.outer(dLdb3, Z2)
        # Hidden Layer 2
        dLdb2 = (W3.T @ (dLdb3)) * Z2 * (1-Z2)
        dLdW2 = np.outer(dLdb2,Z1)
        # Hidden Layer 1
        dLdb1 = (W2.T @ (dLdb2)) * Z1 * (1-Z1)
        dLdW1 = np.outer(dLdb1, Xtrain)
        
        b3 = b3 - dLdb3
        W3 = W3 - dLdW3
        b2 = b2 - dLdb2
        W2 = W2 - dLdW2
        b1 = b1 - dLdb1
        W1 = W1 - dLdW1
        
        change = norm(dLdb1)+norm(dLdb2)+norm(dLdb3)+norm(dLdW1)+norm(dLdW2)+norm(dLdW3)
        k = k + 1 
    
    Z1pred = W1 @ Xpred + b1
    Z2pred = W2 @ sigmoid(Z1pred) + b2
    Z3pred = W3 @ sigmoid(Z2pred) + b3
    Ypred = softmax(Z3pred)
    print("")
    print("Summary")
    print("Target Y \n", Y)
    print("Fitted Ytrain \n", Yhat)
    print("Xpred\n", Xpred)
    print("Fitted Ypred \n", Ypred)
    print("Weight Matrix 1 \n", W1)
    print("Bias Vector 1 \n", b1)
    print("Weight Matrix 2 \n", W2)
    print("Bias Vector 2 \n", b2)
    print("Weight Matrix 3 \n", W3)
    print("Bias Vector 3 \n", b3)

In [10]:
# Initial 
W0_1 = np.array([[0.1,0.3,0.7], [0.9,0.4,0.4]])
b_1 = np.array([1.,1.])

W0_2 = np.array([[0.4,0.3], [0.7,0.2]])
b_2 = np.array([1.,1.])

W0_3 = np.array([[0.5,0.6], [0.6,0.7], [0.3,0.2]])
b_3 = np.array([1.,1.,1.]) 

In [11]:
X_train = np.array([0.1,0.7,0.3])
YY = np.array([1.,0.,0.])
X_pred = X_train

In [12]:
myANN(YY, X_train, X_pred, W0_1, W0_2, W0_3, b_1, b_2, b_3)

Iteration 1
Current Loss: 1.0245882244632154
Iteration 2
Current Loss: 0.15485530792507038
Iteration 3
Current Loss: 0.09392421885330306
Iteration 4
Current Loss: 0.06800390179198364
Iteration 5
Current Loss: 0.05342702439392962
Iteration 6
Current Loss: 0.04403516135363163
Iteration 7
Current Loss: 0.037464385843672066
Iteration 8
Current Loss: 0.03260386527878382
Iteration 9
Current Loss: 0.02886032281105674
Iteration 10
Current Loss: 0.025887274175115597
Iteration 11
Current Loss: 0.023468486741688343
Iteration 12
Current Loss: 0.021461901907750262
Iteration 13
Current Loss: 0.019770281829020363
Iteration 14
Current Loss: 0.01832479491681583
Iteration 15
Current Loss: 0.017075333031958018
Iteration 16
Current Loss: 0.0159845417596331
Iteration 17
Current Loss: 0.015024000543215876
Iteration 18
Current Loss: 0.014171699633555721
Iteration 19
Current Loss: 0.013410327234309708
Iteration 20
Current Loss: 0.012726078464642231
Iteration 21
Current Loss: 0.012107809483423326
Iteration 22


In [13]:
# using KERAS
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD

In [14]:
# add the layer
model = Sequential()
model.add(Dense(2, input_dim=3, activation='sigmoid', weights = [W0_1.T, b_1]))
model.add(Dense(2, activation='sigmoid', weights = [W0_2.T, b_2]))
model.add(Dense(3, activation='softmax', weights = [W0_3.T, b_3]))

In [15]:
sgd = SGD(lr=1)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['categorical_crossentropy'])
model.get_weights()



[array([[0.1, 0.9],
        [0.3, 0.4],
        [0.7, 0.4]], dtype=float32),
 array([1., 1.], dtype=float32),
 array([[0.4, 0.7],
        [0.3, 0.2]], dtype=float32),
 array([1., 1.], dtype=float32),
 array([[0.5, 0.6, 0.3],
        [0.6, 0.7, 0.2]], dtype=float32),
 array([1., 1., 1.], dtype=float32)]

In [16]:
model.fit(X_train.reshape((1,3)), YY.reshape((1, 3)), epochs=200, batch_size=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x1823d9f9ac0>

In [17]:
model.predict(X_pred.reshape((1, 3)))

array([[9.9878985e-01, 6.1903609e-04, 5.9115869e-04]], dtype=float32)

In [18]:
model.get_weights()

[array([[0.10459033, 0.9023976 ],
        [0.33213237, 0.41678518],
        [0.7137712 , 0.4071937 ]], dtype=float32),
 array([1.0459025, 1.0239786], dtype=float32),
 array([[0.5922062 , 0.88894045],
        [0.49371308, 0.39044544]], dtype=float32),
 array([1.2366259, 1.2326592], dtype=float32),
 array([[ 2.1052914 , -0.31430355, -0.3909867 ],
        [ 2.2433093 , -0.23618647, -0.5071224 ]], dtype=float32),
 array([ 2.8937364 , -0.08099043,  0.18725535], dtype=float32)]