In [24]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data

class Layer_Dense:

    #layer initialization 
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1,n_neurons))

    #forward pass
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.biases

    def backward(self, dvalues):
        #derivative wrt of weights is inputs. Dimension adjustment is needed
        self.dweights = np.dot(self.inputs.T, dvalues)
        #derivative of bias is column sums
        self.dbiases = np.sum(dvalues, axis = 0, keepdims = True)
        #derivative wrt of inputs is weights
        self.dinputs = np.dot(dvalues, self.weights.T)

class Activation_ReLU:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.maximum(0,inputs) #if the inputs is lower than 0, we make it 0, uf not, then we pass on

    def backward(self, dvalues):
        self.dinputs = dvalues.copy() #copy the gradient matrix
        self.dinputs[self.inputs<= 0] = 0 #if it is lower than 0 then we'll make it zero

class Activation_Softmax:
    def forward(self, inputs):
        self.inputs = inputs
        exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True)) #make the max = 1 and the min is -inf
        probabilities = exp_values / np.sum(exp_values, axis = 1, keepdims = True)# normalize

        self.output = probabilities

    def backward(self, dvalues):
        self.dinputs = np.empty_like(dvalues)

        for index, (single_output, single_dvalues)  in enumerate(zip(self.output, dvalues)):
            #single output is self.output[0]. it has the dimension 1xn, then the next line, we'll shift if to n,1
            single_output = single_output.reshape(-1,1) #reshape the output to become (,1). n row with 1 column
            
            #diagflat is to craete the matrix where diagonal is a value and the rest is 0. then based on the formula 
            #diagflat will have n,n matrix with diagonal is single_output and the rest is 0
            #then we have single.output where dim = n,1 and single.output.T dim = 1,n. this will result n,n
            jacobian_matrix  = np.diagflat(single_output) - np.dot(single_output, single_output.T)

            #jacobian matrix is n,n and single_values is n,1. Why Dvalues has n,n dimension and single is only the first row
            #this will result n,1 dimension
            self.dinputs[index] = np.dot(jacobian_matrix, single_dvalues)
class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output,y) #output in here is the preddiction
        data_loss = np.mean(sample_losses)
        return data_loss

class Loss_CategoricalCrossentropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)

        #clip data to avoid division by 0
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
        
        if len(y_true.shape) == 1 : 
            #softmax_outputs1 = np.array([[ 0.7 , 0.1 , 0.2 ],
            #[ 0.1 , 0.5 , 0.4 ],
            #[ 0.02 , 0.9 , 0.08 ]])
            #class_targets1 = np.array([ 0 , 1 , 1 ])
            #for example [0,1,1] just go to observation where it is y true. 
            #y_pred has a dimension nxn so for the first row, take index 0 y_pred_clipped[0,0]
            #y_pred_clipped[1,1]
            #y_pred_clipped[2,1]

            correct_cofidences = y_pred_clipped[ range(samples), y_true]   

        elif len(y_true.shape) == 2:
            #softmax_outputs2 = np.array([[ 0.7 , 0.1 , 0.2 ],
            #[ 0.1 , 0.5 , 0.4 ],
            #[ 0.02 , 0.9 , 0.08 ]])
            #class_targets2 = np.array([[ 1 , 0 , 0 ],
            #[ 0 , 1 , 0 ],
            #[ 0 , 1 , 0 ]])
            #since this is one hot encoding. only 1 value is 1 and the rest is zero. so when multiplying, only the given 1 will yield a result
            correct_confidences1 = np.sum(y_pred_clipped * y_true, axis = 1) 
        
        negative_log_likelihoods = -np.log(correct_cofidences)
        return negative_log_likelihoods
    
    def backward(self, dvalues, y_true):
        #number of samples
        samples = len(dvalues)

        labels = len(dvalues[0])

        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]

        self.dinputs = -y_true / dvalues
        self.dinputs = self.dinputs / samples

        



In [None]:
#Loss_CategoricalCrossentropy(Loss): explanation
softmax_outputs1 = np.array([[ 0.7 , 0.1 , 0.2 ],
[ 0.1 , 0.5 , 0.4 ],
[ 0.02 , 0.9 , 0.08 ]])
class_targets1 = np.array([ 0 , 1 , 1 ])

softmax_outputs2 = np.array([[ 0.7 , 0.1 , 0.2 ],
[ 0.1 , 0.5 , 0.4 ],
[ 0.02 , 0.9 , 0.08 ]])
class_targets2 = np.array([[ 1 , 0 , 0 ],
[ 0 , 1 , 0 ],
[ 0 , 1 , 0 ]])

print(class_targets1)
print(class_targets1.shape)
print(len(class_targets1.shape))
print(range ( len (softmax_outputs1)))
correct_confidences1 = softmax_outputs1[range ( len (softmax_outputs1)),class_targets1]
print(correct_confidences1)


In [25]:
import numpy as np
softmax_output = np.array([[ 1,2,3,4 ], [5,6,7,8],    [9,10,11,12 ]] )
dvalues = np.array([[ 1 , 2 , 3 , 4 ],[ 5 , 6 , 7 , 8 ],[ 9 , 10 , 11 , 12 ]])
dinputs = np.empty_like(dvalues)
#softmax_output = np.array(softmax_output).reshape( - 1 , 1 )
print(softmax_output)
print(dvalues)



[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [3]:
single_output = softmax_output[0]
single_output = np.array(single_output).reshape( - 1 , 1 )
squared = np.dot(single_output, single_output.T)
diagflat = np.diagflat(single_output)
jacobian_matrix = np.diagflat(single_output) - squared
firstdinputs =np.dot(jacobian_matrix,dvalues[0])
print(single_output.shape)
print(diagflat)
print(single_output.T)
print(single_output)
print(squared)
print(jacobian_matrix)
print(dvalues[0])
print(firstdinputs)

(4, 1)
[[1 0 0 0]
 [0 2 0 0]
 [0 0 3 0]
 [0 0 0 4]]
[[1 2 3 4]]
[[1]
 [2]
 [3]
 [4]]
[[ 1  2  3  4]
 [ 2  4  6  8]
 [ 3  6  9 12]
 [ 4  8 12 16]]
[[  0  -2  -3  -4]
 [ -2  -2  -6  -8]
 [ -3  -6  -6 -12]
 [ -4  -8 -12 -12]]
[1 2 3 4]
[ -29  -56  -81 -104]


In [4]:
#dari buku


In [5]:
for index,(single_output, single_dvalues) in enumerate(zip(softmax_output, dvalues)):
    single_output = single_output.reshape(-1,1)
    jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)
    dinputs[index] =np.dot(jacobian_matrix,single_dvalues)

In [6]:
import numpy as np
import nnfs
nnfs.init()



softmax_outputs = np.array([[ 0.7 , 0.1 , 0.2 ],
[ 0.1 , 0.5 , 0.4 ],
[ 0.02 , 0.9 , 0.08 ]])

class_targets = np.array([0,1,1])

softmax_loss = Activation_Softmax_Loss_CategoricalCrossentropy()
softmax_loss.backward(softmax_outputs, class_targets)
dvalues1 =softmax_loss.dinputs

activation = Activation_Softmax()
activation.output = softmax_outputs
loss = Loss_CategoricalCrossentropy()
loss.backward(softmax_outputs, class_targets)
activation.backward(loss.dinputs)
dvalues2 = activation.dinputs







NameError: name 'Activation_Softmax_Loss_CategoricalCrossentropy' is not defined

In [9]:
softmax_outputs1 = np.array([[ 0.7 , 0.1 , 0.2 ],
[ 0.1 , 0.5 , 0.4 ],
[ 0.02 , 0.9 , 0.08 ]])
class_targets1 = np.array([ 0 , 1 , 1 ])

softmax_outputs2 = np.array([[ 0.7 , 0.1 , 0.2 ],
[ 0.1 , 0.5 , 0.4 ],
[ 0.02 , 0.9 , 0.08 ]])
class_targets2 = np.array([[ 1 , 0 , 0 ],
[ 0 , 1 , 0 ],
[ 0 , 1 , 0 ]])

In [23]:
print(class_targets1)
print(class_targets1.shape)
print(len(class_targets1.shape))
print(range ( len (softmax_outputs)))
correct_confidences1 = softmax_outputs[range ( len (softmax_outputs)),class_targets1]
print(correct_confidences1)


[0 1 1]
(3,)
1
range(0, 3)
[0.7 0.5 0.9]


In [22]:
print(class_targets2)
print(class_targets2.shape)
print (len(class_targets2.shape))
correct_confidences2 = np.sum(softmax_outputs * class_targets2,axis = 1)
print(correct_confidences2)

[[1 0 0]
 [0 1 0]
 [0 1 0]]
(3, 3)
2
[0.7 0.5 0.9]
