In [1]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data

nnfs.init()

class Layer_Dense:

    #layer initialization 
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    def forward(self, inputs):
        #calculate output values form inputs, weights and biases
        self.output = np.dot(inputs, self.weights) + self.biases
class Activation_ReLU:

    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

class Activation_Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True))

        #normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis = 1, keepdims = True)

        self.output = probabilities

#common loss class
class loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output,y)

        data_loss= np.mean(sample_losses)

        return data_loss

class Loss_CategoricalCrossentropy(loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        if len (y_true.shape) == 1:
            correct_confidences  = y_pred_clipped[
                range(samples),
                y_true
            ]
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
                y_pred_clipped * y_true,
                axis = 1
            )
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods

In [2]:
X, y = spiral_data(samples = 100, classes  =3)
dense1 = Layer_Dense(2,3)
activation1 = Activation_ReLU()
dense2 = Layer_Dense(3,3)
activation2 = Activation_Softmax()
loss_function = Loss_CategoricalCrossentropy()


In [3]:
lowest_loss = 9999999
best_dense1_weights = dense1.weights.copy()
best_dense1_biases = dense1.biases.copy()
best_dense2_weights = dense2.weights.copy()
best_dense2_biases = dense2.biases.copy()

In [4]:
for iteration in range(10000):
    dense1.weights = 0.05 * np.random.randn(2,3)
    dense1.biases = 0.05 * np.random.randn(1,3)
    dense2.weights = 0.05 * np.random.randn(3,3)
    dense2.biases = 0.05 * np.random.randn(1,3)

    dense1.forward(X)
    activation1.forward(dense1.output)
    dense2.forward(activation1.output)
    activation2.forward(dense2.output)

    loss = loss_function.calculate(activation2.output,y)

    predictions = np.argmax(activation2.output, axis = 1)
    accuracy = np.mean(predictions == y)
    

    #if loss is smaller - print and save weights and biases aside
    if loss < lowest_loss:
        print("new set of weights found, iteration:", iteration, "loss:", loss, "acc:", accuracy)

        best_dense1_weights = dense1.weights.copy()
        best_dense1_biases = dense1.biases.copy()
        best_dense2_weights = dense1.weights.copy()
        best_dense2_boases = dense1.biases.copy()
        lowest_loss = loss
    else:
        dense1.weights = best_dense1_weights.copy()
        dense1.biases = best_dense1_biases.copy()
        dense2.weights = best_dense2_weights.copy()
        dense2.biases = best_dense2_biases.copy()

new set of weights found, iteration: 0 loss: 1.1008568 acc: 0.3333333333333333
new set of weights found, iteration: 1 loss: 1.0990819 acc: 0.3333333333333333
new set of weights found, iteration: 3 loss: 1.098629 acc: 0.3333333333333333
new set of weights found, iteration: 11 loss: 1.0985013 acc: 0.3333333333333333
new set of weights found, iteration: 58 loss: 1.0984759 acc: 0.36666666666666664
new set of weights found, iteration: 87 loss: 1.0984341 acc: 0.3933333333333333
new set of weights found, iteration: 389 loss: 1.0983855 acc: 0.3333333333333333
new set of weights found, iteration: 602 loss: 1.0983611 acc: 0.3433333333333333
new set of weights found, iteration: 812 loss: 1.0983337 acc: 0.3333333333333333
new set of weights found, iteration: 1765 loss: 1.0982677 acc: 0.37
new set of weights found, iteration: 3766 loss: 1.0980942 acc: 0.3333333333333333
new set of weights found, iteration: 3979 loss: 1.0980837 acc: 0.3333333333333333
new set of weights found, iteration: 5582 loss: 

In [5]:
# Forward pass
x = [ 1.0 , - 2.0 , 3.0 ] # input values
w = [ - 3.0 , - 1.0 , 2.0 ] # weights
b = 1.0 # bias

# Multiplying inputs by weights
xw0 = x[ 0 ] * w[ 0 ]
xw1 = x[ 1 ] * w[ 1 ]
xw2 = x[ 2 ] * w[ 2 ]
# Adding weighted inputs and a bias
z = xw0 + xw1 + xw2 + b
# ReLU activation function
y = max (z, 0 )
# Backward pass
# The derivative from the next layer
dvalue = 1.0 
# Derivative of ReLU and the chain rule
drelu_dz = dvalue * ( 1. if z > 0 else 0. )
print (drelu_dz)

dsum_dxw0 = 1 #derivative of the sum with respect of x[0]w[0] #what is derivative of f(x+y+z+l+m+o) with respect of x? must be 1
dsum_dxw1 = 1
dsum_dxw2 = 1
dsum_db = 1 # bias is a vector. derivative is one
drelu_dxw0 = drelu_dz * dsum_dxw0
drelu_dxw1 = drelu_dz * dsum_dxw1
drelu_dxw2 = drelu_dz * dsum_dxw2
drelu_db = drelu_dz * dsum_db
print(drelu_dxw0, drelu_dxw1, drelu_dxw2, drelu_db)

dmul_dx0 = w[0] #partial derivative of a function multiplicaiton. What is the derivative of f(X * y * z) with the respect of X? it will be everything without the X 
dmul_dx1 = w[1]
dmul_dx2 = w[2]
dmul_dw0 = x[0]
dmul_dw1 = x[1]
dmul_dw2 = x[2]

drelu_dx0 = drelu_dxw0 * dmul_dx0
drelu_dw0 = drelu_dxw0 * dmul_dw0
drelu_dx1 = drelu_dxw1 * dmul_dx1
drelu_dw1 = drelu_dxw1 * dmul_dw1
drelu_dx2 = drelu_dxw2 * dmul_dx2
drelu_dw2 = drelu_dxw2 * dmul_dw2
print (drelu_dx0, drelu_dw0, drelu_dx1, drelu_dw1, drelu_dx2, drelu_dw2)


1.0
1.0 1.0 1.0 1.0
-3.0 1.0 -1.0 -2.0 2.0 3.0


In [6]:
print(w,b)
dx = [drelu_dx0, drelu_dx1, drelu_dx2] # gradients on inputs
dw = [drelu_dw0, drelu_dw1, drelu_dw2] # gradients on weights
db = drelu_db

[-3.0, -1.0, 2.0] 1.0


In [7]:
w[0] += -0.001 * dw[0]
w[1] += -0.001 * dw[1]
w[2] += -0.001 * dw[2]
b += -0.001 * db
print(w,b)


[-3.001, -0.998, 1.997] 0.999


In [8]:
xw0 = x[ 0 ] * w[ 0 ]
xw1 = x[ 1 ] * w[ 1 ]
xw2 = x[ 2 ] * w[ 2 ]

# Adding
z = xw0 + xw1 + xw2 + b
# ReLU activation function
y = max (z, 0 )
print (y)

5.985


In [9]:
import numpy as np
dvalues = np.array([[1.,1.,1.]])
weights = np.array([[ 0.2 , 0.8 , - 0.5 , 1 ],
[ 0.5 , - 0.91 , 0.26 , - 0.5 ],
[ - 0.26 , - 0.27 , 0.17 , 0.87 ]]).T
weights

array([[ 0.2 ,  0.5 , -0.26],
       [ 0.8 , -0.91, -0.27],
       [-0.5 ,  0.26,  0.17],
       [ 1.  , -0.5 ,  0.87]])

In [10]:
print(dvalues.shape)
print(weights.shape)
print(weights[0].shape)
print(dvalues[0].shape)

(1, 3)
(4, 3)
(3,)
(3,)


In [11]:
dx0 = sum (weights[ 0 ]) * dvalues[ 0 ]
dx1 = sum (weights[ 1 ]) * dvalues[ 0 ]
dx2 = sum (weights[ 2 ]) * dvalues[ 0 ]
dx3 = sum (weights[ 3 ]) * dvalues[ 0 ]
dinputs = np.array([dx0, dx1, dx2, dx3])
print (dinputs)

[[ 0.44  0.44  0.44]
 [-0.38 -0.38 -0.38]
 [-0.07 -0.07 -0.07]
 [ 1.37  1.37  1.37]]


In [16]:
import numpy as np
# Passed in gradient from the next layer
# for the purpose of this example we're going to use
# a vector of 1s
dvalues = np.array([[ 1. , 1. , 1. ]])
# We have 3 sets of weights - one set for each neuron
# we have 4 inputs, thus 4 weights
# recall that we keep weights transposed
weights = np.array([[ 0.2 , 0.8 , - 0.5 , 1 ],
[ 0.5 , - 0.91 , 0.26 , - 0.5 ],
[ - 0.26 , - 0.27 , 0.17 , 0.87 ]]).T
# sum weights of given input
# and multiply by the passed in gradient for this neuron
dx0 = sum (weights[ 0 ]) * dvalues[ 0 ]
dx1 = sum (weights[ 1 ]) * dvalues[ 0 ]
dx2 = sum (weights[ 2 ]) * dvalues[ 0 ]
dx3 = sum (weights[ 3 ]) * dvalues[ 0 ]
dinputs = np.array([dx0, dx1, dx2, dx3])
print (dinputs)
dx0


[[ 0.44  0.44  0.44]
 [-0.38 -0.38 -0.38]
 [-0.07 -0.07 -0.07]
 [ 1.37  1.37  1.37]]


array([0.44, 0.44, 0.44])

In [28]:
import numpy as np
# Passed in gradient from the next layer
# for the purpose of this example we're going to use
# a vector of 1s
dvalues = np.array([[ 1. , 1. , 1. ],
[ 2. , 2. , 2. ],
[ 3. , 3. , 3. ]])
# We have 3 sets of weights - one set for each neuron
# we have 4 inputs, thus 4 weights
# recall that we keep weights transposed
weights = np.array([[ 0.2 , 0.8 , - 0.5 , 1 ],
[ 0.5 , - 0.91 , 0.26 , - 0.5 ],
[ - 0.26 , - 0.27 , 0.17 , 0.87 ]]).T
# sum weights of given input
# and multiply by the passed in gradient for this neuron
dx0 = sum (weights[ 0 ] * dvalues[ 0 ])
dx1 = sum (weights[ 1 ] * dvalues[ 0 ])
dx2 = sum (weights[ 2 ] * dvalues[ 0 ])
dx3 = sum (weights[ 3 ] * dvalues[ 0 ])
dinputs = np.array([dx0, dx1, dx2, dx3])
print (dinputs)
dx0

[ 0.44 -0.38 -0.07  1.37]


0.43999999999999995

In [30]:
dinputs = np.dot(dvalues[0], weights.T)
dinputs2 = np.dot(dvalues, weights.T)
print(dinputs2)


[[ 0.44 -0.38 -0.07  1.37]
 [ 0.88 -0.76 -0.14  2.74]
 [ 1.32 -1.14 -0.21  4.11]]
