In [1]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data

class Layer_Dense:

    #layer initialization 
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1,n_neurons))

    #forward pass
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.biases

    def backward(self, dvalues):
        #derivative wrt of weights is inputs. Dimension adjustment is needed
        self.dweights = np.dot(self.inputs.T, dvalues)
        #derivative of bias is column sums
        self.dbiases = np.sum(dvalues, axis = 0, keepdims = True)
        #derivative wrt of inputs is weights
        self.dinputs = np.dot(dvalues, self.weights.T)

class Activation_ReLU:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.maximum(0,inputs) #if the inputs is lower than 0, we make it 0, uf not, then we pass on

    def backward(self, dvalues):
        self.dinputs = dvalues.copy() #copy the gradient matrix
        self.dinputs[self.inputs<= 0] = 0 #self.inputs is the partial derivative of relu with respect to the input. meaning that if input < 0, there is no derivative or equal to 0
    

class Activation_Softmax:
    def forward(self, inputs):
        self.inputs = inputs
        exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True)) #make the max = 1 and the min is -inf
        probabilities = exp_values / np.sum(exp_values, axis = 1, keepdims = True)# normalize

        self.output = probabilities

    def backward(self, dvalues):
        self.dinputs = np.empty_like(dvalues)

        for index, (single_output, single_dvalues)  in enumerate(zip(self.output, dvalues)):
            #single output is self.output[0]. it has the dimension 1xn, then the next line, we'll shift if to n,1
            single_output = single_output.reshape(-1,1) #reshape the output to become (,1). n row with 1 column
            
            #diagflat is to craete the matrix where diagonal is a value and the rest is 0. then based on the formula 
            #diagflat will have n,n matrix with diagonal is single_output and the rest is 0
            #then we have single.output where dim = n,1 and single.output.T dim = 1,n. this will result n,n
            jacobian_matrix  = np.diagflat(single_output) - np.dot(single_output, single_output.T)

            #jacobian matrix is n,n and single_values is n,1. Why Dvalues has n,n dimension and single is only the first row
            #this will result n,1 dimension
            self.dinputs[index] = np.dot(jacobian_matrix, single_dvalues)
class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output,y) #output in here is the preddiction
        data_loss = np.mean(sample_losses)
        return data_loss

class Loss_CategoricalCrossentropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)

        #clip data to avoid division by 0
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
        
        if len(y_true.shape) == 1 : 
            #softmax_outputs1 = np.array([[ 0.7 , 0.1 , 0.2 ],
            #[ 0.1 , 0.5 , 0.4 ],
            #[ 0.02 , 0.9 , 0.08 ]])
            #class_targets1 = np.array([ 0 , 1 , 1 ])
            #for example [0,1,1] just go to observation where it is y true. 
            #y_pred has a dimension nxn so for the first row, take index 0 y_pred_clipped[0,0]
            #y_pred_clipped[1,1]
            #y_pred_clipped[2,1]

            correct_cofidences = y_pred_clipped[ range(samples), y_true]   

        elif len(y_true.shape) == 2:
            #softmax_outputs2 = np.array([[ 0.7 , 0.1 , 0.2 ],
            #[ 0.1 , 0.5 , 0.4 ],
            #[ 0.02 , 0.9 , 0.08 ]])
            #class_targets2 = np.array([[ 1 , 0 , 0 ],
            #[ 0 , 1 , 0 ],
            #[ 0 , 1 , 0 ]])
            #since this is one hot encoding. only 1 value is 1 and the rest is zero. so when multiplying, only the given 1 will yield a result
            correct_confidences1 = np.sum(y_pred_clipped * y_true, axis = 1) 
        
        negative_log_likelihoods = -np.log(correct_cofidences)
        return negative_log_likelihoods
    
    def backward(self, dvalues, y_true):
        #number of samples
        samples = len(dvalues) #np.array([[ 1 , 2 , 3 ],[ 5 , 6 , 7 ],[ 9 , 10 , 11  ]])

        labels = len(dvalues[0]) #array([1,2,3])

        #if y_true is [0,1,1]
        #then np.eye will make it 
        #array([[1., 0., 0.],
        #       [0., 1., 0.],
        #       [0., 1., 0.]], dtype=float32)

        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true] 
    
        self.dinputs = -y_true / dvalues #partial derivatives with respect tp inputs = matrix 3x3 - 3x3
        #the derivative of this loss fucntion with respect ot is input = ground truth vector / vector of predicted values

        self.dinputs = self.dinputs / samples
        #normalize to make the sum magnitude invariant to the number of samples. 

        
class Activation_Softmax_Loss_CategoricalCrossentropy():
    def __init__(self):
        self.activation = Activation_Softmax()
        self.loss = Loss_CategoricalCrossentropy()

    def forward(self, inputs, y_true):
        self.activation.forward(inputs) #use softmax activation
        self.output = self.activation_output #the output is a probability
        return self.loss.caclulate(self.output, y_true) #calculate loss between predicted (self.output) and y_true
    
    def backward(self, dvalues, y_true):
        samples = len(dvalues)

        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true, axis = 1) #convert from one hot encoder to the discrete true labels
        
        self.dinputs = dvalues.copy()
        self.dinputs [range(samples) ,y_true] -= 1 #only at the given ytue, the value is minus by one. why?
        #becayse the partial derivative of loss wrt of softmax function inputs. 

    


In [2]:
#main codes
# 
# import numpy as np
import nnfs
nnfs.init()



softmax_outputs = np.array([[ 0.7 , 0.1 , 0.2 ],
[ 0.1 , 0.5 , 0.4 ],
[ 0.02 , 0.9 , 0.08 ]])

class_targets = np.array([0,1,1])

softmax_loss = Activation_Softmax_Loss_CategoricalCrossentropy()
softmax_loss.backward(softmax_outputs, class_targets)
dvalues1 =softmax_loss.dinputs

activation = Activation_Softmax()
activation.output = softmax_outputs
loss = Loss_CategoricalCrossentropy()
loss.backward(softmax_outputs, class_targets)
activation.backward(loss.dinputs)
dvalues2 = activation.dinputs

activation = Activation_Softmax()
activation.output = softmax_outputs # di assign daria tas
loss = Loss_CategoricalCrossentropy()
loss.backward(softmax_outputs, class_targets)
#make the matrix of the d values and then -class_targets / softmax_outputs then normalize using samples 







In [3]:
dvalues1.shape

(3, 3)

In [4]:
var1= np.eye(len(softmax_outputs[0]))[class_targets]
var2 = -var1 / softmax_outputs
var3 = var2/ len(softmax_outputs)
print (var3)
print(loss.dinputs)

[[-0.47619048 -0.         -0.        ]
 [-0.         -0.66666667 -0.        ]
 [-0.         -0.37037037 -0.        ]]
[[-0.47619048 -0.         -0.        ]
 [-0.         -0.66666667 -0.        ]
 [-0.         -0.37037037 -0.        ]]


In [5]:
#test the code
import numpy as np
import nnfs

softmax_outputs = np.array([[ 0.7 , 0.1 , 0.2 ],
                            [ 0.1 , 0.5 , 0.4 ],
                            [ 0.02 , 0.9 , 0.08 ]])
class_targets = np.array([ 0 , 1 , 1 ])

softmax_loss = Activation_Softmax_Loss_CategoricalCrossentropy()
softmax_loss.backward(softmax_outputs, class_targets)
#what happen in here
samples = len(softmax_outputs)
if len(class_targets.shape) == 2:#conver from one hot encoded to discrete true label
            y_true = np.argmax(class_targets, axis = 1)


dvalues1 = softmax_loss.dinputs

print(dvalues1)

[[-0.3   0.1   0.2 ]
 [ 0.1  -0.5   0.4 ]
 [ 0.02 -0.1   0.08]]


In [6]:
X, y = spiral_data(samples = 69, classes = 3)

dense1 = Layer_Dense(2,3)

activation1 = Activation_ReLU()

dense2 = Layer_Dense(3,3)

loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()

dense1.forward(X)
activation1.forward(dense1.output)
dense2.forward(activation1.output)
loss = loss_activation.forward(dense2.output,y)

print(loss_activation.output[:5])
print('loss:', loss)

predictions = np.argamax(loss_activation.output, axis =1 )
if len(y.shape) == 2:
    y = np.argmax(y, axis = 1) #convert from one hot encoder to layer dense
accuracy= np.mean(predictions == y)

AttributeError: 'Activation_Softmax_Loss_CategoricalCrossentropy' object has no attribute 'activation_output'

In [None]:
#Loss_CategoricalCrossentropy(Loss): explanation
softmax_outputs1 = np.array([[ 0.7 , 0.1 , 0.2 ],
[ 0.1 , 0.5 , 0.4 ],
[ 0.02 , 0.9 , 0.08 ]])
class_targets1 = np.array([ 0 , 1 , 1 ])

softmax_outputs2 = np.array([[ 0.7 , 0.1 , 0.2 ],
[ 0.1 , 0.5 , 0.4 ],
[ 0.02 , 0.9 , 0.08 ]])
class_targets2 = np.array([[ 1 , 0 , 0 ],
[ 0 , 1 , 0 ],
[ 0 , 1 , 0 ]])

print(class_targets1)
print(class_targets1.shape)
print(len(class_targets1.shape))
print(range ( len (softmax_outputs1)))
correct_confidences1 = softmax_outputs1[range ( len (softmax_outputs1)),class_targets1]
print(correct_confidences1)


In [None]:
import numpy as np
softmax_output = np.array([[ 1,2,3,4 ], [5,6,7,8],    [9,10,11,12 ]] )
dvalues = np.array([[ 1 , 2 , 3 ],[ 5 , 6 , 7 ],[ 9 , 10 , 11  ]])
dinputs = np.empty_like(dvalues)
class_targets1 = np.array([ 0 , 1 , 1 ])
class_targets2 = np.array([[ 1 , 0 , 0 ],            [ 0 , 1 , 0 ],            [ 0 , 1 , 0 ]])
#softmax_output = np.array(softmax_output).reshape( - 1 , 1 )
np.eye(3)[class_targets1]
dvc = dvalues.copy()
dvc[range(3), class_targets1] -= 1
print(dvc)

In [None]:
single_output = softmax_output[0]
single_output = np.array(single_output).reshape( - 1 , 1 )
squared = np.dot(single_output, single_output.T)
diagflat = np.diagflat(single_output)
jacobian_matrix = np.diagflat(single_output) - squared
firstdinputs =np.dot(jacobian_matrix,dvalues[0])
print(single_output.shape)
print(diagflat)
print(single_output.T)
print(single_output)
print(squared)
print(jacobian_matrix)
print(dvalues[0])
print(firstdinputs)

In [None]:
#dari buku


In [None]:
for index,(single_output, single_dvalues) in enumerate(zip(softmax_output, dvalues)):
    single_output = single_output.reshape(-1,1)
    jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)
    dinputs[index] =np.dot(jacobian_matrix,single_dvalues)

In [None]:
print(softmax_output)

In [None]:
softmax_outputs1 = np.array([[ 0.7 , 0.1 , 0.2 ],
[ 0.1 , 0.5 , 0.4 ],
[ 0.02 , 0.9 , 0.08 ]])
class_targets1 = np.array([ 0 , 1 , 1 ])

softmax_outputs2 = np.array([[ 0.7 , 0.1 , 0.2 ],
[ 0.1 , 0.5 , 0.4 ],
[ 0.02 , 0.9 , 0.08 ]])
class_targets2 = np.array([[ 1 , 0 , 0 ],
[ 0 , 1 , 0 ],
[ 0 , 1 , 0 ]])

In [None]:
print(class_targets1)
print(class_targets1.shape)
print(len(class_targets1.shape))
print(range ( len (softmax_outputs)))
correct_confidences1 = softmax_outputs[range ( len (softmax_outputs)),class_targets1]
print(correct_confidences1)


In [None]:
print(class_targets2)
print(class_targets2.shape)
print (len(class_targets2.shape))
correct_confidences2 = np.sum(softmax_outputs * class_targets2,axis = 1)
print(correct_confidences2)