In [1]:
import numpy as np
import nnfs
from nnfs.datasets import spiral_data, sine_data
import pandas as pd

class Layer_Dense:

    #layer initialization 
    def __init__(self, n_inputs, n_neurons, weight_regularizer_l1 = 0,
    bias_regularizer_l1 =0, weight_regularizer_l2 = 0, bias_regularizer_l2 = 0):
        
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1,n_neurons))
        self.weight_regularizer_l1 = weight_regularizer_l1
        self.weight_regularizer_l2 = weight_regularizer_l2
        self.bias_regularizer_l1 = bias_regularizer_l1
        self.bias_regularizer_l2 = bias_regularizer_l2
    
    def get_parameters(self):
        return self.weights, self.biases

    #forward pass
    def forward(self, inputs,training):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.biases

    def backward(self, dvalues):
        #derivative wrt of weights is inputs. Dimension adjustment is needed
        self.dweights = np.dot(self.inputs.T, dvalues)
        #derivative of bias is column sums
        self.dbiases = np.sum(dvalues, axis = 0, keepdims = True)

        if self.weight_regularizer_l1>0:
            dl1 = np.ones_like(self.weights)
            dl1[self.weights<0] = -1
            self.dweights += self.weight_regularizer_l1 * dl1
        
        if self.weight_regularizer_l2 > 0 :
            self.dweights += 2 * self.weight_regularizer_l2 * self.weights

        if self.bias_regularizer_l1 > 0 :
            dl1 = np.ones_like(self.biases)
            dl1[self.biases<0] = -1
            self.dbiases += self.bias_regularizer_l1 * dl1
        
        if self.bias_regularizer_l2 > 0 :
            self.dbiases += 2 * self.bias_regularizer_l2 * self.biases
        #derivative wrt of inputs is weights
        self.dinputs = np.dot(dvalues, self.weights.T)

class Activation_ReLU:
    def forward(self, inputs,training):
        self.inputs = inputs
        self.output = np.maximum(0,inputs) #if the inputs is lower than 0, we make it 0, uf not, then we pass on

    def backward(self, dvalues):
        self.dinputs = dvalues.copy() #copy the gradient matrix
        self.dinputs[self.inputs<= 0] = 0 #if it is lower than 0 then we'll make it zero

    def predictions(self,outputs):
        return outputs

class Activation_Softmax:
    def forward(self, inputs,training):
        self.inputs = inputs
        exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True)) #make the max = 1 and the min is -inf
        probabilities = exp_values / np.sum(exp_values, axis = 1, keepdims = True)# normalize

        self.output = probabilities

    def backward(self, dvalues):
        self.dinputs = np.empty_like(dvalues)

        for index, (single_output, single_dvalues)  in enumerate(zip(self.output, dvalues)):
            #single output is self.output[0]. it has the dimension 1xn, then the next line, we'll shift if to n,1
            single_output = single_output.reshape(-1,1) #reshape the output to become (,1). n row with 1 column
            
            #diagflat is to craete the matrix where diagonal is a value and the rest is 0. then based on the formula 
            #diagflat will have n,n matrix with diagonal is single_output and the rest is 0
            #then we have single.output where dim = n,1 and single.output.T dim = 1,n. this will result n,n
            jacobian_matrix  = np.diagflat(single_output) - np.dot(single_output, single_output.T)

            #jacobian matrix is n,n and single_values is n,1. Why Dvalues has n,n dimension and single is only the first row
            #this will result n,1 dimension
            self.dinputs[index] = np.dot(jacobian_matrix, single_dvalues)

    def predictions(self, outputs):
        return np.argmax(outputs, axis = 1)

class Loss:
    def remember_trainable_layers(self, trainable_layers):
        self.trainable_layers = trainable_layers

    def regularization_loss(self):
        regularization_loss = 0

        for layer in self.trainable_layers:

            if layer.weight_regularizer_l1 > 0:
                regularization_loss += layer.weight_regularizer_l1 * np.sum(np.abs(layer.weights))

            if layer.weight_regularizer_l2 > 0:
                regularization_loss += layer.weight_regularizer_l2 * np.sum(layer.weights * layer.weights)

            if layer.bias_regularizer_l1 > 0:
                regularization_loss += layer.bias_regularizer_l1 * np.sum(layer.abs(layer.biases))

            if layer.bias_regularizer_l2 > 0 :
                regularization_loss += layer.bias_regularizer_l2 * np.sum(layer.biases * layer.biases)

        return regularization_loss

    def calculate(self, output, y, *, include_regularization = False):
        sample_losses = self.forward(output,y) #output in here is the preddiction
        data_loss = np.mean(sample_losses)

        #Add accumulated sum of losses and sample count
        self.accumulated_sum += np.sum(sample_losses)
        self.accumulated_count += len(sample_losses)

        if not include_regularization:
            return data_loss
        return data_loss, self.regularization_loss()

    def calculate_accumulated(self, *, include_regularization = False):
        data_loss = self.accumulated_sum / self.accumulated_count

        if not include_regularization:
            return data_loss
        
        return data_loss, self.regularization_loss()

    def new_pass(self):
        self.accumulated_sum = 0
        self.accumulated_count = 0
class Loss_CategoricalCrossentropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)

        #clip data to avoid division by 0
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
        
        if len(y_true.shape) == 1 : 
            #softmax_outputs1 = np.array([[ 0.7 , 0.1 , 0.2 ],
            #[ 0.1 , 0.5 , 0.4 ],
            #[ 0.02 , 0.9 , 0.08 ]])
            #class_targets1 = np.array([ 0 , 1 , 1 ])
            #for example [0,1,1] just go to observation where it is y true. 
            #y_pred has a dimension nxn so for the first row, take index 0 y_pred_clipped[0,0]
            #y_pred_clipped[1,1]
            #y_pred_clipped[2,1]

            correct_cofidences = y_pred_clipped[ range(samples), y_true]   

        elif len(y_true.shape) == 2:
            #softmax_outputs2 = np.array([[ 0.7 , 0.1 , 0.2 ],
            #[ 0.1 , 0.5 , 0.4 ],
            #[ 0.02 , 0.9 , 0.08 ]])
            #class_targets2 = np.array([[ 1 , 0 , 0 ],
            #[ 0 , 1 , 0 ],
            #[ 0 , 1 , 0 ]])
            #since this is one hot encoding. only 1 value is 1 and the rest is zero. so when multiplying, only the given 1 will yield a result
            correct_confidences1 = np.sum(y_pred_clipped * y_true, axis = 1) 
        
        negative_log_likelihoods = -np.log(correct_cofidences)
        return negative_log_likelihoods
    
    def backward(self, dvalues, y_true):
        #number of samples
        samples = len(dvalues) #np.array([[ 1 , 2 , 3 ],[ 5 , 6 , 7 ],[ 9 , 10 , 11  ]])

        labels = len(dvalues[0]) #array([1,2,3])

        #if y_true is [0,1,1]
        #then np.eye will make it 
        #array([[1., 0., 0.],
        #       [0., 1., 0.],
        #       [0., 1., 0.]], dtype=float32)

        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true] 
    
        self.dinputs = -y_true / dvalues #partial derivatives with respect tp inputs = matrix 3x3 - 3x3
        #the derivative of this loss fucntion with respect ot is input = ground truth vector / vector of predicted values

        self.dinputs = self.dinputs / samples
        #normalize to make the sum magnitude invariant to the number of samples. 
      
class Activation_Softmax_Loss_CategoricalCrossentropy():
    def backward(self, dvalues, y_true):
        samples = len(dvalues)

        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true, axis = 1) #convert from one hot encoder to the discrete true labels
        
        self.dinputs = dvalues.copy()
        self.dinputs [range(samples) ,y_true] -= 1 #only at the given ytue, the value is minus by one. why?
        #becayse the partial derivative of loss wrt of softmax function inputs. 
        self.dinputs = self.dinputs/samples

class Optimizer_SGD:
    def __init__(self, learning_rate = 1., decay  = 0., momentum  = 0):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.iterations = 0
        self.decay = decay
        self.current_decay = decay
        self.momentum = momentum
        self.name = f'OptimizerSGD lr:{self.learning_rate}, decay ={self.decay}, momentum = {self.momentum}'
        

    def pre_udpate_params(self):
        if self.decay:
            self.current_decay = 1./ (1.+ self.decay * self.iterations)
            self.current_learning_rate = self.learning_rate * (1./ (1.+ self.decay * self.iterations))

    def update_params(self, layer):
        if self.momentum:
        #if we use momentum
            if not hasattr(layer, 'weight_momentums'):
                layer.weight_momentums = np.zeros_like(layer.weights)
                layer.bias_momentums = np.zeros_like(layer.biases)

            weight_updates = self.momentum * layer.weight_momentums - self.current_learning_rate * layer.dweights
            layer.weight_momentums = weight_updates

            bias_updates = self.momentum * layer.bias_momentums - self.current_learning_rate * layer.dbiases
            layer.bias_momentums = bias_updates

        else:
            weight_updates = -self.learning_rate * layer.dweights
            bias_updates = -self.learning_rate * layer.dbiases  
        
        layer.weights += weight_updates
        layer.biases += bias_updates

    def post_update_params(self):
        self.iterations += 1

class Optimizer_Adagrad:
    def __init__(self, learning_rate = 1, decay  = 0.,  epsilon = 1e-7):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.iterations = 0
        self.decay = decay
        self.current_decay = decay
        self.epsilon = epsilon #is only to prevent division by 0
        self.name = f'Adagrad decay =  {self.decay}'
        

    def pre_udpate_params(self):
        if self.decay:
            self.current_decay = 1./ (1.+ self.decay * self.iterations)
            self.current_learning_rate = self.learning_rate * (1./ (1.+ self.decay * self.iterations))

    def update_params(self, layer):
        #if we use momentum
        if not hasattr(layer, 'weight_cache'):
            layer.weight_cache = np.zeros_like(layer.weights)
            layer.bias_cache = np.zeros_like(layer.biases)

        layer.weight_cache += layer.dweights ** 2
        layer.bias_cache += layer.dbiases ** 2


        layer.weights += -self.current_learning_rate * layer.dweights / (np.sqrt(layer.weight_cache) + self.epsilon)
        layer.biases += -self.current_learning_rate * layer.dbiases  / (np.sqrt(layer.bias_cache) + self.epsilon)

    def post_update_params(self):
        self.iterations += 1
        
class Optimizer_RMSprop:
    def __init__(self, learning_rate = 0.001, decay  = 0., rho = 0.9, epsilon = 1e-7):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.iterations = 0
        self.decay = decay
        self.current_decay = decay
        self.epsilon = epsilon #is only to prevent division by 0
        self.name = f'RMSProp =  {self.decay}'
        self.rho = rho
        

    def pre_udpate_params(self):
        if self.decay:
            self.current_decay = 1./ (1.+ self.decay * self.iterations)
            self.current_learning_rate = self.learning_rate * (1./ (1.+ self.decay * self.iterations))

    def update_params(self, layer):
        #if we use momentum
        if not hasattr(layer, 'weight_cache'):
            layer.weight_cache = np.zeros_like(layer.weights)
            layer.bias_cache = np.zeros_like(layer.biases)

        layer.weight_cache = self.rho * layer.weight_cache + ( 1- self.rho) * layer.dweights **2
        layer.bias_cache = self.rho * layer.bias_cache + ( 1- self.rho) * layer.dbiases **2


        layer.weights += -self.learning_rate * layer.dweights / (np.sqrt(layer.weight_cache) + self.epsilon)
        layer.biases += -self.learning_rate * layer.dbiases  / (np.sqrt(layer.bias_cache) + self.epsilon)

    def post_update_params(self):
        self.iterations += 1


class Optimizer_Adam:
    def __init__(self, learning_rate = 0.001, decay  = 0., epsilon = 1e-7, beta_1 = 0.9, beta_2 = 0.999):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.iterations = 0
        self.decay = decay
        self.current_decay = decay
        self.epsilon = epsilon #is only to prevent division by 0
        self.name = f'OptimizerAdam lr =  {self.learning_rate}, decay = {self.current_decay}'
        self.beta_1 = beta_1
        self.beta_2 = beta_2

        

    def pre_update_params(self):
        if self.decay:
            self.current_decay = 1./ (1.+ self.decay * self.iterations)
            self.current_learning_rate = self.learning_rate * (1./ (1.+ self.decay * self.iterations))

    def update_params(self, layer):
        #if we use momentum
        if not hasattr(layer, 'weight_cache'):
            layer.weight_momentums = np.zeros_like(layer.weights)
            layer.weight_cache = np.zeros_like(layer.weights)
            layer.bias_momentums = np.zeros_like(layer.biases)
            layer.bias_cache = np.zeros_like(layer.biases)

        layer.weight_momentums = self.beta_1 * layer.weight_momentums + (1-self.beta_1) * layer.dweights
        layer.bias_momentums = self.beta_1 * layer.bias_momentums + (1- self.beta_1) * layer.dbiases

        weight_momentums_corrected = layer.weight_momentums / (1- self.beta_1 ** (self.iterations + 1))
        bias_momentums_corrected = layer.bias_momentums / (1- self.beta_1 ** (self.iterations + 1))

        layer.weight_cache = self.beta_2 * layer.weight_cache + (1 - self.beta_2) * layer.dweights ** 2
        layer.bias_cache = self.beta_2 * layer.bias_cache + (1- self.beta_2) * layer.dbiases ** 2

        #get corrected cache
        weight_cache_corrected = layer.weight_cache / (1- self.beta_2 ** (self.iterations +1))
        bias_cache_corrected = layer.bias_cache / (1- self.beta_2 ** (self.iterations + 1))

        layer.weights += -self.current_learning_rate * weight_momentums_corrected / (np.sqrt(weight_cache_corrected) + self.epsilon)
        layer.biases += -self.current_learning_rate * bias_momentums_corrected / (np.sqrt(bias_cache_corrected) + self.epsilon)
            

    def post_update_params(self):
        self.iterations += 1

class Layer_Dropout:
    def __init__(self, droprate):
        self.rate = 1- droprate

    def forward(self, inputs,training):
        self.inputs = inputs
        if not training:
            self.output = inputs.copy()
        self.binary_mask = np.random.binomial(1, self.rate, size=inputs.shape) / self.rate
        #1 is onnly zero and one, self.rate is the probability of success. which is (1- rate )
    
        self.output = inputs*self.binary_mask
    
    def backward(self, dvalues):
        self.dinputs = dvalues * self.binary_mask

class Activation_Sigmoid:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = 1/ (1+ np.exp(-inputs))

    def backward(self, dvalues):
        self.dinputs = dvalues * (1- self.output) * self.output

    def predictions(self, outputs):
        return (outputs >0.5) * 1 #just to convert the boolean to the number

class Activation_Linear:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = inputs
    
    def backward(self, dvalues):
        self.dinputs = dvalues.copy()
    
    def predictions(self, outputs):
        return outputs

class Loss_MeanSquaredError(Loss):
    def forward(self, y_pred, y_true):
        sample_losses = np.mean((y_true - y_pred)**2,axis = -  1)
        return sample_losses

    def backward(self, dvalues, y_true):
        samples = len(dvalues)
        outputs = len(dvalues[0])
        self.dinputs = -2*(y_true - dvalues) / outputs
        self.dinputs = self.dinputs / samples

class Loss_MeanAbsoluteError(Loss):
    def forward(self, y_pred, y_true ):
        sample_losses = np.mean(np.abs(y_true - y_pred),axis = -1)
        return sample_losses

    def backward(self, dvalues, y_true):
        samples = len(dvalues)
        outputs = len(dvalues[0])
        self.dinputs = np.sign(y_true-dvalues) / outputs
        self.dinputs = self.dinputs / samples

class Layer_Input:
    def forward(self, inputs,training):
        self.output = inputs

class Accuracy:
    def calculate (self, predictions, y):
        comparisons = self.compare(predictions,y)
        accuracy = np.mean(comparisons)

        #add accumulated sum of matching values and sample count
        self.accumulated_sum += np.sum(comparisons)
        self.accumulated_count += len(comparisons)

        return accuracy

    def calculate_accumulated(self):
        accuracy = self.accumulated_sum / self.accumulated_count
        return accuracy

    def new_pass(self):
        self.accumulated_sum  = 0
        self.accumulated_count = 0


class Accuracy_Regression(Accuracy):
    def __init__(self):
        self.precision = None
    
    def init(self, y, reinit = False):
        if self.precision is None or reinit:
            self.precision = np.std(y) / 250

    def compare(self, predictions, y):
        return np.absolute(predictions - y) < self.precision

class Loss_BinaryCrossentropy(Loss):
    def forward(self, y_pred, y_true):
        y_pred_clipped = np.clip(y_pred, 1e-7, 1e-7)

        sample_losses = -(y_true * np.log(y_pred_clipped) + (1 - y_true) * np.log(1 -y_pred_clipped))
        sample_losses = np.mean(sample_losses, axis = 1)

        return sample_losses
    
    def backward(self, dvalues, y_true):
        samples = len(dvalues)
        outputs = len(dvalues[0])
        clipped_dvalues = np.clip(dvalues, 1e-7 , 1 - 1e-7 )
        self.dinputs = -(y_true / clipped_dvalues - (1 - y_true) / (1- clipped_dvalues)) / outputs
        self.dinputs = self.dinputs / samples

class Model:
    def __init__(self):
        self.layers = []
        self.softmax_classifier_output = None #we combine to make the calculation faster

    def add (self, layer):
        self.layers.append(layer)

    def set ( self , * , loss = None , optimizer = None , accuracy = None ):
        if loss is not None :
            self.loss = loss
        if optimizer is not None :
            self.optimizer = optimizer
        if accuracy is not None :
            self.accuracy = accuracy
    
    def train(self, X,y,* , epochs = 1, print_every =1,
                validation_data = None,
                batch_size = None):
        
        self.accuracy.init(y)
        train_steps = 1

        if validation_data is not None: #validation for epoch
            validation_steps = 1

            X_val, y_val = validation_data

        if batch_size is not None:
            train_steps = len(X)//batch_size
            #dividing is round down. we need to add 1

            if train_steps * batch_size < len(X):
                train_steps += 1

            if validation_data is not None:
               validation_steps = len(X_val) // batch_size

               if validation_steps * batch_size < len(X_val):
                   validation_steps += 1



        for epoch in range(1,epochs+1):
            print(f'epoch:{epoch}')

            self.loss.new_pass()
            self.accuracy.new_pass()

            for step in range(train_steps):

                #if batch size is not set
                #train using one step and full dataset

                if batch_size is None:
                    batch_X = X
                    batch_y = y
                else:
                    batch_X = X[step * batch_size:(step + 1 ) * batch_size]
                    batch_y = y[step * batch_size:(step + 1 ) * batch_size]


                output = self.forward(batch_X,training=True)# look at the layer the first one is input and the last is loss

                data_loss, regularization_loss = self.loss.calculate(output,batch_y, include_regularization = True)
                loss = data_loss + regularization_loss

                predictions = self.output_layer_activation.predictions(output) #in every activaitn there is predictions
                accuracy = self.accuracy.calculate(predictions,batch_y)

                self.backward(output,batch_y)

                self.optimizer.pre_update_params()
                for layer in self.trainable_layers:
                    self.optimizer.update_params(layer)
                self.optimizer.post_update_params()
                # Print a summary
                if not step % print_every or step == train_steps - 1 :
                    print (f'step: {step} , ' +
                        f'acc: {accuracy :.3f} , ' +
                        f'loss: {loss :.3f} (' +
                        f'data_loss: {data_loss :.3f} , ' +
                        f'reg_loss: {regularization_loss :.3f} ), ' +
                        f'lr: {self.optimizer.current_learning_rate} ' )

            epoch_data_loss, epoch_regularization_loss = self.loss.calculate_accumulated(include_regularization = True)
            epoch_loss = epoch_data_loss + epoch_regularization_loss
            epoch_accuracy = self.accuracy.calculate_accumulated()


            print (f'training, ' +
                f'acc: {epoch_accuracy :.3f} , ' +
                f'loss: {epoch_loss :.3f} (' +
                f'data_loss: {epoch_data_loss :.3f} , ' +
                f'reg_loss: {epoch_regularization_loss :.3f} ), ' +
                f'lr: {self.optimizer.current_learning_rate} ' )

            if validation_data is not None:
                self.evaluate(*validation_data, batch_size = batch_size)
   
            
    
    def finalize(self):
        self.input_layer = Layer_Input()

        layer_count = len(self.layers)

        self.trainable_layers = []

        for i in range(layer_count):
            if i == 0:
                self.layers[i].prev = self.input_layer
                self.layers[i].next = self.layers[i+1]

            elif i < layer_count -1:
                self.layers[i].prev = self.layers[i -1]
                self.layers[i].next = self.layers[i+1]

            else:
                self.layers[i].prev = self.layers[i-1]
                self.layers[i].next = self.loss
                self.output_layer_activation = self.layers[i]

            if hasattr(self.layers[i],'weights'):
                self.trainable_layers.append(self.layers[i])
         # Update loss object with trainable layers
        if self.loss is not None:
            self.loss.remember_trainable_layers(self.trainable_layers)

        #self.layers[-1] means the lass
        if isinstance(self.layers[-1] ,Activation_Softmax)\
             and isinstance(self.loss, Loss_CategoricalCrossentropy):
             self.softmax_classifier_output = Activation_Softmax_Loss_CategoricalCrossentropy()



    def forward(self, X,training):
        self.input_layer.forward(X,training) #make input the output

        for layer in self.layers:
            layer.forward(layer.prev.output,training)
        #okeh. we take input data pass theis data to input_layer. which create output attribut of this object. then we iterate over self.layers. 
        #the first hidden layer. we perform a forward pass on the layer.prev.output. the output data of the previous layer. 
        #for the first iteration, the layer.prev is self.input-layer. the output attribute is created for each alyer when we call the forward method. 
        
        return layer.output
    
    def backward(self, output, y):
        self.loss.backward(output,y) #tis is the first backward

        for layer in reversed(self.layers):
            layer.backward(layer.next.dinputs) # we are using the next one to pass it backward

    def evaluate ( self , X_val , y_val , * , batch_size = None ):
        validation_steps = 1

        if batch_size is not None:
            validation_steps = len(X_val) // batch_size
            if validation_steps * batch_size < len(X_val):
                validation_steps += 1 

        self.loss.new_pass()
        self.accuracy.new_pass()

        for step in range(validation_steps):
            if batch_size is None:
                batch_X = X_val
                batch_y = y_val
            
            else:
                batch_X = X_val[step*batch_size:(step+1)*batch_size]
                batch_y = y_val[step*batch_size:(step+1)*batch_size]

            output = self.forward(batch_X,training = False)
            loss = self.loss.calculate(output, batch_y)
            predictions = self.output_layer_activation.predictions(output)
            accuracy = self.accuracy.calculate(predictions,batch_y)

        validation_loss = self.loss.calculate_accumulated()
        validation_accuracy = self.accuracy.calculate_accumulated()
        print (f'validation, ' +
                f'acc: {accuracy :.3f} , ' +
                f'loss: {loss :.3f} ' )
    
    def get_parameters(self):
        parameters = []

        for layer in self.trainable_layers:
            parameters.append(layer.get_parameters())

        return parameters

    def set_parameters(self, parameters):
        for parameter_set, layer in zip(parameters, self.trainable_layers):
            layer.set_parameters(*parameter_set)
        

class Accuracy_Categorical(Accuracy):
    def init(self, y):
        pass
    
    def compare(self, predictions, y):
        if len(y.shape) == 2: # if this is one hot encoding, we take the largest value and conver it into vector of n x1
            y = np.argmax(y, axis = 1)
        return predictions == y

In [2]:
'''URL = 'https://nnfs.io/datasets/fashion_mnist_images.zip'
FILE = '/Users/apit/PythonTraining/RawData/FashionMNIST/fashion_mnist_images.zip'
FOLDER = '/Users/apit/PythonTraining/RawData/FashionMNIST/fashion_mnist_images'
'''




"URL = 'https://nnfs.io/datasets/fashion_mnist_images.zip'\nFILE = '/Users/apit/PythonTraining/RawData/FashionMNIST/fashion_mnist_images.zip'\nFOLDER = '/Users/apit/PythonTraining/RawData/FashionMNIST/fashion_mnist_images'\n"

In [3]:
import os
import matplotlib as plt
import cv2

pathloc = '/Users/apit/PythonTraining/RawData/FashionMNIST/fashion_mnist_images'
def load_mnist_dataset(dataset,path):
    tfloc = '/Users/apit/PythonTraining/RawData/FashionMNIST/fashion_mnist_images/train'
    labels = os.listdir(tfloc)
    X = []
    y = []

    for label in labels:
        for file in os.listdir(os.path.join('/Users/apit/PythonTraining/RawData/FashionMNIST/fashion_mnist_images','train',label)):
            image = cv2.imread(os.path.join(tfloc,label,file),cv2.IMREAD_UNCHANGED)
            X.append(image)
            y.append(label)
    return np.array(X), np.array(y).astype('uint8')

def create_data_mnist(path):
    X,y = load_mnist_dataset('train',path)
    X_test, y_test = load_mnist_dataset('test',path)

    return X,y,X_test, y_test
    

In [4]:
X,y,X_test, y_test = create_data_mnist(pathloc)
keys = np.array(range(X.shape[0]))
np.random.shuffle(keys)
X = X[keys]
y = y[keys]

X = (X.reshape(X.shape[ 0 ], - 1 ).astype(np.float32) - 127.5 ) / 127.5
X_test = (X_test.reshape(X_test.shape[ 0 ], - 1 ).astype(np.float32) -
127.5 ) / 127.5

model = Model()
model.add(Layer_Dense(X.shape[1],128))
model.add(Activation_ReLU())
model.add(Layer_Dense(128,128))
model.add(Activation_ReLU())
model.add(Layer_Dense(128,10))
model.add(Activation_Softmax())

model.set(
    loss = Loss_CategoricalCrossentropy(),
    optimizer = Optimizer_Adam(decay = 1e-3),
    accuracy = Accuracy_Categorical()

)




In [5]:
model.finalize()
model.train(X, y, validation_data = (X_test, y_test),
epochs = 10 , batch_size = 128 , print_every = 100 )
model.evaluate(X_test, y_test)
parameters = model.get_parameters()
print(parameters)

epoch:1
step: 0 , acc: 0.164 , loss: 2.302 (data_loss: 2.302 , reg_loss: 0.000 ), lr: 0.001 
step: 100 , acc: 0.664 , loss: 0.733 (data_loss: 0.733 , reg_loss: 0.000 ), lr: 0.0009090909090909091 
step: 200 , acc: 0.852 , loss: 0.488 (data_loss: 0.488 , reg_loss: 0.000 ), lr: 0.0008333333333333334 
step: 300 , acc: 0.805 , loss: 0.473 (data_loss: 0.473 , reg_loss: 0.000 ), lr: 0.0007692307692307692 
step: 400 , acc: 0.859 , loss: 0.359 (data_loss: 0.359 , reg_loss: 0.000 ), lr: 0.0007142857142857143 
step: 468 , acc: 0.865 , loss: 0.384 (data_loss: 0.384 , reg_loss: 0.000 ), lr: 0.000681198910081744 
training, acc: 0.757 , loss: 0.656 (data_loss: 0.656 , reg_loss: 0.000 ), lr: 0.000681198910081744 
validation, acc: 0.927 , loss: 0.251 
epoch:2
step: 0 , acc: 0.820 , loss: 0.471 (data_loss: 0.471 , reg_loss: 0.000 ), lr: 0.0006807351940095304 
step: 100 , acc: 0.805 , loss: 0.472 (data_loss: 0.472 , reg_loss: 0.000 ), lr: 0.0006373486297004461 
step: 200 , acc: 0.852 , loss: 0.405 (data_

In [6]:
a = np.array([[1,2,3],[4,5,6]])
b = np.array([[1,2,3],[4,5,6]])
c = np.sum(a, axis = 0, keepdims = True)
print(b)
print(c)
print(b.shape)
print(c.shape)

[[1 2 3]
 [4 5 6]]
[[5 7 9]]
(2, 3)
(1, 3)


In [7]:
print(list(reversed(model.layers)))

[<__main__.Activation_Softmax object at 0x7fbc306d2700>, <__main__.Layer_Dense object at 0x7fbc306d2b20>, <__main__.Activation_ReLU object at 0x7fbc306d25e0>, <__main__.Layer_Dense object at 0x7fbc306d7100>, <__main__.Activation_ReLU object at 0x7fbc306d2970>, <__main__.Layer_Dense object at 0x7fbc306d70d0>]
