In [11]:
import numpy as np

# Input Data
X = np.array(([[0.05, 0.05, 0.05], [0.1, 0.1, 0.1]]), dtype=float) # 2, 3
y = np.array([[0.01, 0.01, 0.01], [0.99, 0.99, 0.99]], dtype=float) # 2, 3



In [12]:
class Model3:

    def __init__(self, model_structure=[2, 3, 2], modelActivationFunctions=["sigmoid", "sigmoid"]):

        self.model_structure = model_structure
        self.modelActivationFunctions = modelActivationFunctions
        self.modelWidth = len(model_structure)

        # Safety Check to make sure model structure is legitimate
        if self.modelWidth<3 or self.modelWidth-1!=len(self.modelActivationFunctions):
            print("Model Structure Error!")
            exit(1)  

        # Weights (Parameters) - Randomly Assigned
        self.weights = list()
        self.weights.append(np.random.randn(self.model_structure[1],self.model_structure[0])) # Input Layer Weights
        for i in range(1, self.modelWidth-1):
            self.weights.append(np.random.randn(self.model_structure[i+1], self.model_structure[i]))

        # Biases - Randomly Assigned
        self.biases = list()
        for i in range(1, self.modelWidth):
            self.biases.append(np.random.randn(self.model_structure[i], 1))  # Length should be number of columns of X
        '''self.weights = [np.array(([[0.15, 0.3], [0.2, 0.35], [0.25, 0.4]]), dtype=float), np.array(([[0.5, 0.6, 0.7], [0.55, 0.65, 0.75]]), dtype=float)]
        self.biases = [[[0.45], [0.45], [0.45]], [[0.8], [0.8]]]'''

    def sigmoid(self, z):
            return 1/(1+np.exp(-z))
    
    def sigmoidPrime(self, z):
        # Derivative of Sigmoid Function
        return np.exp(-z)/((1+np.exp(-z))**2)
    
    def softmax(self, z):
        z -= np.max(z)
        sm = (np.exp(z).T / np.sum(np.exp(z), axis=0)).T
        return sm
        
    '''
    # https://aerinykim.medium.com/how-to-implement-the-softmax-derivative-independently-from-any-loss-function-ae6d44363a9d
    def softmax(z):
        z -= np.max(z)
        sm = (np.exp(z).T / np.sum(np.exp(z), axis=0)).T
        return sm
    '''
    def softmaxPrime(self, z):
        soft_max = self.softmax(x)
        # Reshape the 1-d softmax to 2-d so that np.dot will do the matrix multiplication
        s = soft_max.reshape(-1,1)
        return np.diagflat(s) - np.dot(s, s.T)

    def relu(self, z):
        return np.maximum(0, z)
    
    def reluPrime(self, x):
        x[x<=0] = 0
        x[x>0] = 1
        return x
    
    def applyActivationFunction(self, values, activation):
        if activation=='sigmoid':
            return self.sigmoid(values)
        elif activation=='softmax':
            return self.softmax(values)
        elif activation=='relu':
            return self.relu(values)
        else:
            print("Unknown Activation Function! Got:", activation)
            exit(1)
    
    def applyActivationFunctionPrime(self, values, activation):
        if activation=='sigmoid':
            return self.sigmoidPrime(values)
        elif activation=='softmax':
            return self.softmaxPrime(values)
        elif activation=='relu':
            return self.reluPrime(values)
        else:
            print("Unknown Activation Function! Got:", activation)
            exit(1)

    def forward(self, X):
        # Propogate inputs through networks
        self.aValues = list()
        self.zValues = list()
        self.aValues.append(X) # First a value = input values
        for i in range(0, len(self.weights)):
            self.zValues.append(np.dot(self.weights[i], self.aValues[i]) + self.biases[i])
            self.aValues.append(self.applyActivationFunction(self.zValues[i], self.modelActivationFunctions[i]))
        yHat = self.aValues[-1]
        return yHat

    def costFunction(self, X, y):
        # Compute cost using the weights already stored
        self.yHat = self.forward(X)
        J = 0.5*sum((y-self.yHat)**2)
        return J
    
    def costFunctionPrime(self, X, y):
        # Computes partial derivatives of Cost function with respect to weights & biases
        self.yHat = self.forward(X)
        
        weightDerivatives = list() # Derivative of Cost function with respect to weights
        biasDerivatives = list() # Derivative of Cost function with respect to biases

        # Last Layer derivatives
        delta = np.multiply(-(y-self.aValues[-1]), self.sigmoidPrime(self.zValues[-1]))
        weightDerivatives.insert(0, np.dot(delta, self.aValues[-2].T))
        biasDerivatives.insert(0, delta.sum(axis=1).reshape(delta.shape[0],1)) 
        
        # Derivatives for the other layers (L-1, L-2, ...)
        for i in range(self.modelWidth-2, 0, -1):
            delta = np.multiply(np.dot(self.weights[i].T, delta), self.applyActivationFunctionPrime(self.zValues[i-1], self.modelActivationFunctions[i])) 
            weightDerivatives.insert(0, np.dot(delta, self.aValues[i-1].T))
            biasDerivatives.insert(0, delta.sum(axis=1).reshape(delta.shape[0],1))

        return weightDerivatives, biasDerivatives
    
    def tuneParams(self, X, y, learning_rate=0.5):
        # Get Derivatives of Weights & Biases, and then adjust weights/biases with learning rate*derivatives
        self.weightDerivatives, self.biasDerivatives = self.costFunctionPrime(X, y)
        scalar = learning_rate/X.shape[1] # learning rate divided by number of samples
        for i in range(0, len(self.weights)):
            self.weights[i] = self.weights[i] - scalar*self.weightDerivatives[i]
            self.biases[i] = self.biases[i] - scalar*self.biasDerivatives[i]


In [17]:
#model = Model3()
model = Model3(model_structure=[2, 5, 3, 2], modelActivationFunctions=['relu', 'relu', 'sigmoid']) # Another test

In [18]:
cost1 = model.costFunction(X, y)
print("Before:", cost1)
model.tuneParams(X, y)
cost4 = model.costFunction(X, y)
print("After:", cost4)

Before: [0.35089486 0.35089486 0.35089486]
After: [0.29235721 0.29235721 0.29235721]


In [None]:
# Other Random Stuff (mainly for debugging)


In [5]:
print(np.random.randn(3, 1))

[[1.53347245]
 [1.14802192]
 [1.05946144]]


In [6]:
cost1 = model.costFunction(X, y)
print(cost1)

[0.17626944 0.17626944 0.17626944]


In [423]:
thing = model.aValues
for i in range(len(thing)):
    print(str(i)+": ")
    print(thing[i])
    print()


0: 
[[0.05 0.05 0.05]
 [0.1  0.1  0.1 ]]

1: 
[[0.45495606 0.45495606 0.45495606]
 [0.13651343 0.13651343 0.13651343]
 [0.44382312 0.44382312 0.44382312]
 [0.16402444 0.16402444 0.16402444]
 [0.21399519 0.21399519 0.21399519]]

2: 
[[0.15912218 0.15912218 0.15912218]
 [0.         0.         0.        ]
 [0.         0.         0.        ]]

3: 
[[0.5759319  0.5759319  0.5759319 ]
 [0.47727111 0.47727111 0.47727111]]



In [388]:
model.biases

[array([[-0.03573841],
        [-0.17444797],
        [-1.28903062]]), array([[ 1.06369257],
        [-0.1038854 ]])]

In [483]:
model.weights[1]

array([[ 0.85795118, -0.53548953, -0.42762427,  0.53554238,  0.2337851 ],
       [-0.17523553,  0.2969102 ,  0.79902637, -1.26389863, -1.04866783],
       [ 0.2601352 ,  1.48739739, -0.8230129 ,  0.49066952,  2.07740798]])

In [487]:
model.weightDerivatives[0]

array([[-0.00101457, -0.00202913],
       [ 0.        ,  0.        ],
       [-0.0005538 , -0.00110759],
       [ 0.00097552,  0.00195104],
       [ 0.0013559 ,  0.00271181]])

In [485]:
model.weights[0] - model.weightDerivatives[0]

array([[-0.71622685,  0.08222378],
       [ 2.30359731, -0.66182807],
       [ 0.31348833,  0.49001116],
       [-0.55052604, -0.41989909],
       [ 0.04692531,  0.06503582]])