<h1>Libraries</h1>

In [19]:
import numpy as np
import matplotlib.pyplot as plt
import h5py

%matplotlib inline

<h1>Mounting the Drive</h1>

In [20]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


<h1>Dataset Operations</h1>

In [21]:
def build_mini_batches(X,Y,size):
    '''
    Description: splits the given dataset, X, into mini batches, preserving the correspondance between the dataset and its corresponding labels

    Inputs:
    - X: the dataset to be split into mini batches
    - Y: the labels corresponding to X
    - size: the size of each mini batch

    Returns:
    - mini_batches: a list of tuples of the form (Xt,Yt), where Xt is a mini batch of X, and Yt are the labels corresponding to Xt
    '''

    #reshape the dataset X from a conventional image dataset shape to a fully connected input shape
    X=X.reshape(X.shape[0],X.shape[1],X.shape[2])
    X=X.reshape(X.shape[0],X.shape[1]*X.shape[2])
    X=X.T
    m=X.shape[1]
    assert size<=m

    #create the shuffled order of the dataset
    permutation=np.random.permutation(m)

    #shuffle the dataset, as well as its corresponding labels
    X=X[:,permutation]
    Y=Y[:,permutation]

    #instantiate the mini batch list
    mini_batches=[]

    #create the mini batches of size 'size'
    whole_batches=m//size
    for t in range(whole_batches):
        X_mini_batch=X[:,t*size:(t+1)*size]
        Y_mini_batch=Y[:,t*size:(t+1)*size]
        mini_batches.append((X_mini_batch,Y_mini_batch))

    #if there are any elements of the dataset that have not been arranged into a mini batch, arrange said elements into a mini batch
    if m%size!=0:
        X_mini_batch=X[:,whole_batches*size:m]
        Y_mini_batch=Y[:,whole_batches*size:m]
        mini_batches.append((X_mini_batch,Y_mini_batch))
    
    return mini_batches

def split_into_strips(Xt,strips):
    '''
    Description: split the mini batch Xt into subsets that comprise strips of the original image to which Xt corresponds

    Inputs:
    - Xt: mini batch to split into the aforementioned subsets
    - strips: the number of subsets into which to split our mini batches

    Returns:
    - tuple(split_mini_batch): a tuple of the form (Xt1,Xt2,...,Xtstrips), where each Xti (for i ranging from 1 to strips) is an appropriate subset of Xt
    '''

    #confirm the validity of the number of strips chosen
    if int(np.sqrt(Xt.shape[0]))%strips==0:

        #establish the width of each strip (or the height, rather)
        width=Xt.shape[0]//strips

        #instantiate the list of subsets of Xt
        split_mini_batch=[]

        #split Xt into subsets and organize into split_mini_batch appropriately
        for i in range(strips):
            split_mini_batch.append(Xt[i*width:(i+1)*width,:])
    
        return tuple(split_mini_batch)
    else:
        raise ValueError('you have elected a number of strips that results in strips of unequal width. please re-enter the number of strips you\'d like, and try again.')

def reshuffle_split_mini_batches(X,Y,hp):
    '''
    Description: splits the dataset X into minibatches, and splits each of these mini batches into appropriate subsets, as described in the description of split_into_strips

    Inputs:
    - X: dataset to be split into mini batches and subsets
    - Y: the labels corresponding to X (throughout the operations this function performs, correspondence between X and Y is preserved)
    - hp: the hyperparameters in accordance with which this function operates

    Returns:
    - split_mini_batches: a list of tuples of the form ((Xt1,Xt2,...,Xtstrips),Yt), where (Xt1,Xt2,...,Xtstrips) is as described above, and Yt is the set of labels corresponding to (Xt1,Xt2,...,Xtstrips)
    '''

    #split our dataset into mini batches
    mini_batches=build_mini_batches(X,Y,hp['mini_batch_size'])

    #instantiate our return variable
    split_mini_batches=[]

    #split the above mini batches into subsets and arrange into split_mini_batches (with Yt) accordingly
    for (Xt,Yt) in mini_batches:
        split_mini_batches.append((split_into_strips(Xt,hp['strips']),Yt))
    
    return split_mini_batches

<h1>Initialization Operations</h1>

In [22]:
def initialize_parameters(layer_dims,initialization):
    '''
    Description: initializes the parameters of a fully connected network whose architecture is specified by layer_dims

    Inputs:
    - layer_dims: a list of the number of nodes in each of the layers of the network whose parameters we wish to initialize
    - initialization: the method in accordance with which our parameters are initialized

    Returns:
    - parameters: a dictionary containing all of the parameters relevant to our network
    '''

    #establish the number of layers in the network
    L=len(layer_dims)

    #instantiate our parameter dictionary
    parameters={}

    #in accordance with our preferred initialization method, initialize each of the relevant parameters
    if initialization=='None':
        for l in range(1,L):
            parameters['W'+str(l)]=0.01*np.random.rand(layer_dims[l],layer_dims[l-1])
            parameters['gamma'+str(l)]=np.ones((layer_dims[l],1))
            parameters['beta'+str(l)]=np.zeros((layer_dims[l],1))
    elif initialization=='He':
        for l in range(1,L):
            const=np.sqrt(2/layer_dims[l-1])
            parameters['W'+str(l)]=const*np.random.rand(layer_dims[l],layer_dims[l-1])
            parameters['gamma'+str(l)]=np.ones((layer_dims[l],1))
            parameters['beta'+str(l)]=np.zeros((layer_dims[l],1))
    elif initialization=='Xavier':
        for l in range(1,L):
            const=np.sqrt(1/layer_dims[l-1])
            parameters['W'+str(l)]=const*np.random.rand(layer_dims[l],layer_dims[l-1])
            parameters['gamma'+str(l)]=np.ones((layer_dims[l],1))
            parameters['beta'+str(l)]=np.zeros((layer_dims[l],1))
    elif initialization=='Other':
        for l in range(1,L):
            const=np.sqrt(2/(layer_dims[l]+layer_dims[l-1]))
            parameters['W'+str(l)]=const*np.random.rand(layer_dims[l],layer_dims[l-1])
            parameters['gamma'+str(l)]=np.ones((layer_dims[l],1))
            parameters['beta'+str(l)]=np.zeros((layer_dims[l],1))
    
    return parameters

def build_nets(split_mini_batch,hp):
    '''
    Description: initializes the parameters necessary for all of the subnets, as well as all of the supernets in our model

    Inputs:
    - split_mini_batch: a single mini batch from the dataset whose contents we wish to model (used to establish the input layer shape of our subnets)
    - hp: the hyperparameters in accordance with which the generation of the subnets and supernet takes place

    Returns:
    - subnet_parameters: a dictionary of dictionaries, each (nested) dictionary defining the parameters for a different subnet
    - supnet parameters: a dictionary defining the parameters for the supernet
    '''

    #extract a single subset from the given mini batch (these subsets are what is ultimately fed into our subnets as inputs), as well as its corresponding labels
    X=split_mini_batch[0][0]
    Y=split_mini_batch[1]

    #instantiate our subnet parameters dictionary
    subnet_parameters={}

    #define the dimensions of our subnets, in accordance with hp['subnet_hidden'], which contained the number of hidden nodes in a hidden layer, if there are any hidden layers
    if hp['subnet_hidden']==[]:
        layer_dims=[X.shape[0],Y.shape[0]]
    else:
        layer_dims=[X.shape[0]]+hp['subnet_hidden']+[Y.shape[0]]

    #in accordance with the dimensions of our subnets, define their parameters
    for i in range(hp['strips']):
        subnet_parameters['subnet'+str(i+1)]=initialize_parameters(layer_dims,hp['initialization'])

    #repeat the above process for the supernet
    if hp['supnet_hidden']==[]:
        layer_dims=[hp['strips']*Y.shape[0],Y.shape[0]]
    else:
        layer_dims=[hp['strips']*Y.shape[0]]+hp['supnet_hidden']+[Y.shape[0]]
    supnet_parameters=initialize_parameters(layer_dims,hp['initialization'])

    return subnet_parameters,supnet_parameters

def normalize_inputs(X):
    '''
    Description: normalize the dataset X by dividing each of its elements by 255 (since the range of pixel intensities of an image is given by [0,255])

    Inputs:
    - X: the dataset to be noramlized

    Returns:
    - X divided by 255
    '''

    return X/255

<h1>Network Operations</h1>

In [23]:
def sigmoid(z,backward=False):
    '''
    Description: the sigmoid activation function

    Inputs:
    - z: the np.array() object to which we wish to apply the sigmoid activation function
    - backward: dictates whether or not the function is being called during the forward pass or the backward pass

    Returns:
    - the np.array() object resulting from the sigmoid activation function being applied to z

    '''

    if backward:
        return (1/(1+np.exp(-z)))*(1-(1/(1+np.exp(-z))))
    else:
        return 1/(1+np.exp(-z))

def relu(z,backward=False):
    '''
    Description: the ReLU activation function

    Inputs:
    - z: the np.array() object to which we wish to apply the ReLU activation function
    - backward: dictates whether or not the function is being called during the forward pass or the backward pass

    Returns:
    - the np.array() object resulting from the ReLU activation function being applied to z
    '''
    if backward:
        return np.where(z<0,0,1)
    else:
        return np.where(z<0,0,z)

def softmax(z):
    '''
    Description: the softmax activation function (this activation function may only be, and is necessarily, applied to the final layer of both the subnet and the supernet)

    Inputs:
    - z: the np.array() object to which we wish to apply the softmax activation function

    Returns:
    - the np.array() object resulting from the softmax activation function being applied to z
    '''
    magnitude=np.sum(np.exp(z),axis=0,keepdims=True)
    return np.exp(z)/magnitude

def concat(z,classes,m=None):
    '''
    Description: concatenates the activations produced by the subnets to form an input for the supernet.

    Inputs:
    - z: a list of activations produced by the subnets, to be concatenated into a supernet input
    - classes: the number of output nodes in a given subnet
    - m: the size of a mini batch on which we are training the subnets

    Returns:
    - new_z: the elements of z, concatenated into an appropriately shaped input to the supernet

    Notes: this function was also used in a previous variation of the project, where subnets operated slightly differently, hence m may be regarded as dictating whether or not this function is being called in the forward pass or the backward pass
    '''

    #check the size of the mini batch (if m==None, then the function is being called during the backward pass, a possibility which is omitted in this iteration of the project, as mentioned by the note above)
    if m==None:
        subnet_dA=[]
        for i in range(z.shape[0]//classes):
            subnet_dA.append(z[i*classes:(i+1)*classes,:])
        return subnet_dA
    else:

        #instantiate our return variable
        new_z=np.zeros((len(z)*classes,m))

        #arrange the elements of z into our return variable
        for i in range(len(z)):
            new_z[i*classes:(i+1)*classes,:]=z[i]

        return new_z

def propagate_forward(X,parameters,activations,epsilon=1e-8):
    '''
    Description: propagates the mini batch X forward across a network with parameters 'parameters'

    Inputs:
    - X: mini batch to be propagated forward
    - parameters: the parameters of the network along which X is propagated
    - activations: the activation function of each of the layers in the network with an activation function
    - epsilon: prevents division by 0

    Returns:
    - cache_list: the list of values caches for the purposes of performing the backward pass
    - A_prev: the final layer activation produced by the network in question
    '''

    #assign X to A_prev so that we may utilize A_prev in our eventual loop through the network layers
    A_prev=X

    #instiate the list in which the caches values from each layer will be stored
    cache_list=[]

    #identify the number of layers in the network
    L=len(parameters)//3

    #loop through the layers in the network
    for l in range(L):

        #apply the weights to the input to the layer
        Z=np.dot(parameters['W'+str(l+1)],A_prev)

        #apply batch normalization to the resulting np.array() object
        mu=np.mean(Z,axis=1,keepdims=True)
        std=np.var(Z,axis=1,keepdims=True)
        Zhat=(Z-mu)/np.sqrt(std+epsilon)
        Ztilde=parameters['gamma'+str(l+1)]*Zhat+parameters['beta'+str(l+1)]

        #cache the relevant values before updating A_prev
        cache_list.append((Ztilde,Zhat,mu,Z,std,A_prev))

        #update A_prev
        A_prev=activations[l](Ztilde)

    return cache_list,A_prev

def compute_cost(AL,Y):
    '''
    Description: computes the (softmax) cost of a network output

    Inputs:
    - AL: the output produced by the network in question
    - Y: the true labels to which the outputs correspond

    Returns:
    - the aforementioned cost
    '''

    #retrieve the size of the mini batch for which the cost is being computed
    m=Y.shape[1]

    #compute the loss of each element
    loss=-np.sum(Y*np.log(AL),axis=0,keepdims=True)

    #average the element-wise loss to obtain the cost
    return np.squeeze(np.sum(loss,axis=1,keepdims=True))/m

def propagate_backward(Y,AL,cache_list,parameters,activations,epsilon=1e-8):
    '''
    Description: performs the backward pass along a network with parameters 'parameters'

    Inputs:
    - Y: the true labels corresponding to the mini batch on which the network is currently being trained
    - AL: the activation produced by the network in question, corresponding to Y
    - cache_list: one of the outputs of propagate_forward, the cache list needed to perform back propagation
    - parameters: the parameters of the network in question
    - activations: the activation functions of each of the layers in the network with an activation function
    - epsilon: avoids division by zero

    Returns:
    - grads: the gradients of each of the parameters in the network
    '''

    #instantiate our gradients dictionary
    grads={}

    #retrieve the size of the mini batch on which the network is currently being trained
    m=Y.shape[1]

    #retrieve the number of layers in the network
    L=len(parameters)//3

    #loop through the entire network
    for l in reversed(range(L)):

        #retrieve the relevant values from the cache corresponding to the current layer
        (Ztilde,Zhat,mu,Z,std,A_prev)=cache_list[l]

        #compute the gradient of the variable to which we apply the activation function of the current layer (during the forward pass)
        if l==L-1:
            dZtilde_loss=AL-Y
            dZtilde=dZtilde_loss/m
        else:
            dZtilde=dA*activations[l](Ztilde,backward=True)

        #propagate backwards through the batch normalization portion of the layer
        dZhat=dZtilde*parameters['gamma'+str(l+1)]
        dgamma=np.sum(dZtilde*Zhat,axis=1,keepdims=True)
        dbeta=np.sum(dZtilde,axis=1,keepdims=True)
        dstd=np.sum(dZhat*((mu-Z)/(2*(std+epsilon)**(3/2))),axis=1,keepdims=True)
        dmu=-np.sum(dZhat*(1/np.sqrt(std+epsilon)),axis=1,keepdims=True)
        dZ=dZhat*(1/np.sqrt(std+epsilon))+(2*dstd*(Z-mu))/m+dmu/m

        #propagate backward through the weighted protion of the layer
        dW=np.dot(m*dZ,A_prev.T)

        #prepare for back propagation through the preceding layer (if there is a preceding layer)
        dA=np.dot(parameters['W'+str(l+1)].T,dZ)

        #amend our gradient dictionary
        grads['dgamma'+str(l+1)]=dgamma
        grads['dbeta'+str(l+1)]=dbeta
        grads['dW'+str(l+1)]=dW
    
    return grads

def update_parameters(parameters,grads,learning_rate,epoch_num):
    '''
    Description: updates the parameters of a network in accordance with the gradients obtained while propagating backward along said network (using learning rate decay)

    Inputs:
    - parameters: the parameters of the network whose parameters we wish to update
    - grads: the gradients corresponding to each of the elements of parameters
    - learning_rate: the initial learning rate
    - epoch_num: the 'how many-th' epoch we're currently executing

    Returns:
    - new_parameters: the updated parameters
    '''

    #perform learning rate decay first
    lrate=learning_rate/(1+(1e-1)*epoch_num)

    #retrieve the number of layers in the network
    L=len(parameters)//3

    #instantiate our return variable
    new_parameters={}

    #populate our return variable with the relevant updated parameters
    for l in range(L):
        new_parameters['W'+str(l+1)]=parameters['W'+str(l+1)]-lrate*grads['dW'+str(l+1)]
        new_parameters['gamma'+str(l+1)]=parameters['gamma'+str(l+1)]-lrate*grads['dgamma'+str(l+1)]
        new_parameters['beta'+str(l+1)]=parameters['beta'+str(l+1)]-lrate*grads['dbeta'+str(l+1)]

    return new_parameters

<h1>Compilation Operation(s)</h1>

In [24]:
def train(X,Y,hp):
    '''
    Description: trains the network on X and Y

    Inputs:
    - X: the training set
    - Y: the true labels corresponding to Y
    - hp: the hyperparameters in accordance with which our training takes place

    Returns:
    - subnet_parameters: the dictionary containing the trained parameters of each of the subnets
    - supnet_parameters: the dictionary containing the trained parameters corresponding to the supernet
    - all_cache: the cache lists for each of the subnets, as well as the supernet, returned for the purpose of adjusting the value of each of the parameters so that said parameters become inference-ready
    '''

    #normalize the dataset
    normalized_X=normalize_inputs(X)

    #split the dataset into appropriate mini batches
    split_mini_batches=reshuffle_split_mini_batches(normalized_X,Y,hp)

    #instatiate the subnet parameters, as well as the supernet parameters
    subnet_parameters,supnet_parameters=build_nets(split_mini_batches[0],hp)

    #loop across each epoch
    for e in range(hp['epochs']):

        #loop across each mini batch
        for (Xt,Yt) in split_mini_batches:

            #instatiate the variables responsible for keeping track of the subnet activations, as well as the subnet cache lists
            all_subnet_AL=[]
            all_caches={}

            #loop across each of the subsets of Xt to propagate the appropriate subset forward along the corresponding subnet, and subsequently performing the appropriate backward pass
            for i in range(len(Xt)):
                subnet_cache,subnet_AL=propagate_forward(Xt[i],subnet_parameters['subnet'+str(i+1)],hp['subnet_activations'])
                all_subnet_AL.append(subnet_AL)
                all_caches['subnet'+str(i+1)]=subnet_cache
                subnet_grads=propagate_backward(Yt,subnet_AL,subnet_cache,subnet_parameters['subnet'+str(i+1)],hp['subnet_activations'])
                subnet_parameters['subnet'+str(i+1)]=update_parameters(subnet_parameters['subnet'+str(i+1)],subnet_grads,hp['learning_rate'],e)
            
            #concatenate the subnet activations
            Xt_concat=concat(all_subnet_AL,Yt.shape[0],m=Yt.shape[1])

            #propagate the above concatenation forward along the supernet, keeping track of the resulting cache list
            supnet_cache,supnet_AL=propagate_forward(Xt_concat,supnet_parameters,hp['supnet_activations'])
            all_caches['supnet']=supnet_cache

            #propagate backward along the supernet, and update the parameters of the supernet accordingly
            supnet_grads=propagate_backward(Yt,supnet_AL,supnet_cache,supnet_parameters,hp['supnet_activations'])
            supnet_parameters=update_parameters(supnet_parameters,supnet_grads,hp['learning_rate'],e)

        #form predictions using the above produced activations
        Y_hat=np.where(supnet_AL==np.amax(supnet_AL,axis=0,keepdims=True),1,0)

        #print the results obtained from the above epoch to the terminal
        print('--------------------------------------------------')
        print('Cost after epoch '+str(e+1)+': '+str(compute_cost(supnet_AL,Yt)))
        print('Error on final mini batch in this epoch: '+str(100*(np.linalg.norm(Yt-Y_hat)/np.sqrt(2*Yt.shape[1]))))
        print('--------------------------------------------------')

        #reshuffle the mini batches
        split_mini_batches=reshuffle_split_mini_batches(normalized_X,Y,hp)

    return subnet_parameters,supnet_parameters,all_caches

<h1>Inference Operations</h1>

In [25]:
def to_inf_params(parameters,cache_list,epsilon=1e-8):
    '''
    Description: converts the parameters 'parameters' to inference-ready parameters

    Inputs:
    - parameters: the parameters we wish to convert
    - cache_list: a list containing the values we are to use to perform the above conversion
    - epsilon: prevents division by zero

    Returns:
    - inf_params: the inference-ready variation of parameters
    '''

    #instantiate our return variable
    inf_params={}

    #retrieve the length of the network whose parameters we wish to convert
    L=len(parameters)//3

    #loop through each layer in the network
    for l in range(L):

        #retrieve the relevant cached values
        (Ztilde,Zhat,mu,Z,std,A_prev)=cache_list[l]

        #convert each of the parameters appropriately
        inf_params['W'+str(l+1)]=parameters['W'+str(l+1)]
        inf_params['gamma'+str(l+1)]=parameters['gamma'+str(l+1)]/np.sqrt(std+epsilon)
        inf_params['beta'+str(l+1)]=parameters['beta'+str(l+1)]-mu*(parameters['gamma'+str(l+1)]/np.sqrt(std+epsilon))

    return inf_params

def inference_forward(X,parameters,activations,epsilon=1e-8):
    '''
    Description: propagates the test set X forward along the inference-ready network in question

    Inputs:
    - X: the test set
    - parameters: inference-ready parameters corresponding to the network in question
    - activations: the activation functions of each of the layers in our network
    - epsilon: prevents division by zero

    Returns:
    - A_prev: the activations produced by the infere-ready network in question
    '''

    #assign X to A_prev so that we may propagate A_prev forward through the network in question
    A_prev=X

    #retrieve the length of the network in question
    L=len(parameters)//3

    #loop through each of the layers of the network
    for l in range(L):

        #apply the relevant weights
        Z=np.dot(parameters['W'+str(l+1)],A_prev)

        #apply the appropriate batch normalization parameters, as well as the activation function corresponding to the current layer
        Ztilde=parameters['gamma'+str(l+1)]*Z+parameters['beta'+str(l+1)]
        A_prev=activations[l](Ztilde)

    return A_prev

def perc_error(X,Y,subnet_parameters,supnet_parameters,hp):
    '''
    Description: compute the percentage error of our model on the test set

    Inputs:
    - X: the test set
    - Y: the labels corresponding to X
    - subnet_parameters: the inference-ready subnet parameters
    - supnet_parameters: the inference-ready supernet parameters
    - hp: the hyperparameters in accordance with which the error is computed

    Returns:
    - the percentage error on the test set
    '''

    #normalize the test set (this process needs to necessarily coincide with the normalization of the training set)
    normalized_X=normalize_inputs(X)

    #split the test set into appropriate mini batches
    [(Xt,Yt)]=reshuffle_split_mini_batches(normalized_X,Y,{'mini_batch_size':Y.shape[1],'strips':hp['strips']})

    #instantiate the subnet activation list
    all_subnet_AL=[]

    #loop across each of the subsets in the test set
    for i in range(len(Xt)):

        #propagate forward along each of the subnets and append the resulting activation to all_subnet_AL
        subnet_AL=inference_forward(Xt[i],subnet_parameters['subnet'+str(i+1)],hp['subnet_activations'])
        all_subnet_AL.append(subnet_AL)
    
    #concatenate the above activations
    Xt_concat=concat(all_subnet_AL,Yt.shape[0],m=Yt.shape[1])

    #feed the above concatenation into the supernet
    supnet_AL=inference_forward(Xt_concat,supnet_parameters,hp['supnet_activations'])

    #convert the activations produced by the supernet into a prediction
    Y_hat=np.where(supnet_AL==np.amax(supnet_AL,axis=0,keepdims=True),1,0)
    assert Y_hat.shape==Y.shape

    return 100*(np.linalg.norm(Yt-Y_hat)/np.sqrt(2*Yt.shape[1]))

<h1>FruitNet API</h1>

In [26]:
class FruitNet():

    def __init__(self):
        '''
        Description: initializes the hyperparameters of our model

        Inputs: none

        Returns: none
        '''

        #instantiate hyperparameters
        self.__hp={'learning_rate':0,
                   'epochs':0,
                   'mini_batch_size':0,
                   'initialization':None,
                   'strips':0,
                   'subnet_names':[],
                   'subnet_hidden':[],
                   'subnet_activations':[],
                   'supnet_names':[],
                   'supnet_hidden':[],
                   'supnet_activations':[]}
    
    def __raw_shuffle__(self,X,Y):
        '''
        Description: shuffles the dataset X, preserving the conventional image dataset shape

        Inputs:
        - X: the image dataset X, (m,w,w,1) (where w is the width of the square grayscale images in the dataset X)
        - Y: the labels corresponding to X

        Returns:
        - shuffled_X: the dataset X, shuffled
        - shuffled_Y: the labels Y, shuffled, so that shuffled_X and shuffled_Y maintain the correspodance present in X and Y
        '''

        #permute [1,2,...,m]
        permutation=np.random.permutation(X.shape[0])

        #shuffle X and Y in accordance with the above permutation
        shuffled_X=X[permutation,:,:,:]
        shuffled_Y=Y[:,permutation]

        return shuffled_X,shuffled_Y

    def loadData(self,filename,features,labels,split=0.05):
        '''
        Description: load data to our model

        Inputs:
        - filename: the h5py file from which our data comes
        - features: the name of the features dataset (of shape (m,w,w,1), as above)
        - labels: the corresponding labels dataset
        - split: the split between the training set and the test set (as a value between 0 and 1, exclusive)

        Returns: none
        '''

        #confirm the validity of split
        assert 0<split<1

        #retrieve data
        try:
            training_set=h5py.File(filename,'r')
            X=training_set[features][:]
            Y=training_set[labels][:]
            training_set.close()
        except:
            raise NameError('you\'ve entered an incorrect h5py filename, feature dataset name, or label dataset name, when loading in your data.')

        #confirm validity of X dataset shape
        assert len(X.shape)==4
        assert X.shape[3]==1

        #confirm validity of Y dataset shape
        assert len(Y.shape)==2
        assert X.shape[0]==Y.shape[1]

        #shuffle datasets (maintaining correspondence)
        shuffled_X,shuffled_Y=self.__raw_shuffle__(X,Y)

        #establish cross-validation test set (and, therefore, training set) sizes
        m=X.shape[0]
        self.__test_quant=int(split*m)
        self.__train_quant=m-self.__test_quant

        #create cross-validation test set and training set using the above sizes
        self.__X_train=shuffled_X[self.__test_quant:self.__test_quant+self.__train_quant,:,:,:]
        self.__Y_train=shuffled_Y[:,self.__test_quant:self.__test_quant+self.__train_quant]
        self.__X_test=shuffled_X[:self.__test_quant,:,:,:]
        self.__Y_test=shuffled_Y[:,:self.__test_quant]

    def __retrieve_single_split_example__(self,strips,view_data=True):
        '''
        Description: as the method name suggests, retrieves a single training example, split into appropriate subsections (as above), from the test set

        Inputs:
        - strips: the number of strips into which the aforementioned single test set element is split
        - view_data: dictates whether or not the function is being called from viewData or __decoy_parameters__

        Returns:
        if view_data:
            - X: the entire example taken from the test set
            - Xt: a tuple containing the subsets into which X has been split
            - Y: the label corresponding to X
        else:
            - a tuple containing the subsets into which X (as above) has been split, as well as the label corresponding to the aforementioned test set element X (denoted Yt in this case, instead of Y, as above)
        '''

        #select a random sample number
        try:
            sample=np.random.randint(0,self.__test_quant)
        except:
            raise UnboundLocalError('self.__test_quant has referenced before assignment, meaning you have not yet loaded in any data. please load in data and try again.')

        #select the sample from the test set and split into regions
        X=np.zeros((1,self.__X_test.shape[1],self.__X_test.shape[2],1))
        Y=np.zeros((self.__Y_test.shape[0],1))
        X[0,:,:,0]=self.__X_test[sample,:,:,0]
        Y[:,0]=self.__Y_test[:,sample]
        [(Xt,Yt)]=reshuffle_split_mini_batches(X,Y,{'mini_batch_size':1,'strips':strips})

        #check why the function has been called and return accordingly
        if view_data:
            return X,Xt,Y
        else:
            return (Xt,Yt)


    def viewData(self,strips):
        '''
        Description: used to visualize the splitting of a training example (technically, though, we use an example from the test set to this end)

        Inputs:
        - strips: the number of strips into which we wish to split a single test set element

        Returns: none
        '''

        #confirm the validity of the number of strips
        assert type(strips)==int
        assert strips>1

        #retrieve (split) item of data (from test set) corresponding to random_sample
        X,Xt,Y=self.__retrieve_single_split_example__(strips)

        #plot the sample, as well as the strips into which we've split it, and the corresponding label
        fig=plt.figure()
        axes=fig.subplots(strips+1,1)
        axes[0].imshow(X[0,:,:,0])
        for i in range(strips):
            axes[i+1].imshow(Xt[i].reshape(X.shape[1]//strips,X.shape[2]))
        print('The label corresponding to the above item of data (read from left to right) is given by:\n'+str(Y.reshape(Y.shape[0],)))
    
    def addLayer(self,name,n_H,activation,net='sub'):
        '''
        Description: adds a hidden layer to the network of choice (bearing in mind that each subnet has the exact same architecture)

        Inputs:
        - name: the name of the layer we're adding to our model
        - n_H: the number of hidden nodes in this layer
        - net: dictates whether or not the layer is being added to the subnet architecture, or the supernet architecture

        Returns: none
        '''
        
        #confirm the validity of name, n_H, activation, and supnet
        assert type(name)==str
        assert type(n_H)==int
        assert n_H>0
        assert activation in [relu,sigmoid]
        assert type(net)==str

        #append the hyperparameters in accordance with the function parameters
        if net=='sup':
            assert name not in self.__hp['supnet_names']
            self.__hp['supnet_names'].append(name)
            self.__hp['supnet_hidden'].append(n_H)
            self.__hp['supnet_activations'].append(activation)
        elif net=='sub':
            assert name not in self.__hp['subnet_names']
            self.__hp['subnet_names'].append(name)
            self.__hp['subnet_hidden'].append(n_H)
            self.__hp['subnet_activations'].append(activation)
        else:
            raise ValueError('you have not selected a valid network to add a layer to. please re-enter the net string and try again.')
    
    def __decoy_parameters__(self):
        '''
        Description: generates a set of subtitute parameters for the purpose of producing a model summary

        Inputs: none

        Returns:
        - the aforementioned set of substitute parameters
        '''

        #extract an example from the test set to preserve computational efficiency
        single_example_mini_batch=self.__retrieve_single_split_example__(self.__hp['strips'],view_data=False)

        #generate and return relevant parameters (NOT as class attributes, though)
        return build_nets(single_example_mini_batch,self.__hp)

    def modelSummary(self):
        '''
        Description: generates a model summary, and prints said summary to the terminal

        Inputs:

        Returns:
        '''

        #confirm that enough hyperparameters have been specified in order for a summary to be generated
        assert self.__hp['strips']>0

        #retrieve the parameters whose summary we wish to produce
        try:
            subnet=self.__subnet_parameters['subnet1']
            supnet=self.__supnet_parameters
        except:
            subnets,supnet=self.__decoy_parameters__()
            subnet=subnets['subnet1']
        
        #produce model summary header
        print('==================================================')
        print('NETWORK ARCHITECTURE SUMMARY')
        print('==================================================\n\n')

        #produce subnet summary
        print('SUBNET SUMMARY')
        print('--------------------------------------------------')
        for i in range(len(subnet)//3+1):
            if i==0:
                print('Input Layer:')
                print('- Name: N/A')
                print('- No. of Input Nodes: '+str(subnet['W1'].shape[1]))
                print('- Activation Function: N/A')
                print('--------------------------------------------------')
            else:
                if i==len(subnet)//3:
                    print('Output Layer:')
                    print('- Name: N/A')
                    print('- No. of Output Nodes: '+str(subnet['W'+str(i)].shape[0]))
                    print('Activation Function: softmax')
                    print('--------------------------------------------------')
                else:
                    print('Hidden Layer '+str(i)+':')
                    print('- Name: '+str(self.__hp['subnet_names'][i-1]))
                    print('- No. of Hidden Nodes: '+str(subnet['W'+str(i)].shape[0]))
                    if self.__hp['subnet_activations'][i-1]==relu:
                        print('- Activation Function: relu')
                    else:
                        print('- Activation Function: sigmoid')
                    print('--------------------------------------------------')
        print('\n')

        #produce supnet summary
        print('SUPNET SUMMARY')
        print('--------------------------------------------------')
        for i in range(len(supnet)//3+1):
            if i==0:
                print('Input Layer:')
                print('- Name: N/A')
                print('- No. of Input Nodes: '+str(supnet['W1'].shape[1]))
                print('- Activation Function: N/A')
                print('--------------------------------------------------')
            else:
                if i==len(supnet)//3:
                    print('Output Layer:')
                    print('- Name: N/A')
                    print('- No. of Output Nodes: '+str(supnet['W'+str(i)].shape[0]))
                    print('Activation Function: softmax')
                    print('--------------------------------------------------')
                else:
                    print('Hidden Layer '+str(i)+':')
                    print('- Name: '+str(self.__hp['supnet_names'][i-1]))
                    print('- No. of Hidden Nodes: '+str(supnet['W'+str(i)].shape[0]))
                    if self.__hp['supnet_activations'][i-1]==relu:
                        print('- Activation Function: relu')
                    else:
                        print('- Activation Function: sigmoid')
                    print('--------------------------------------------------')
        print('\n')

    def adjustLearningRate(self,learning_rate):
        '''
        Description: adjusts the learning rate hyperparameter

        Inputs:
        - learning_rate: the proposed learning rate

        Returns: none
        '''

        #confirm the validity of the proposed learning_rate
        assert type(learning_rate)==float
        assert learning_rate>0

        #inform the user of the current learning rate, as well as the updated learning rate
        print('The current learning rate is '+str(self.__hp['learning_rate'])+'.')
        self.__hp['learning_rate']=learning_rate
        print('And the new learning rate is '+str(self.__hp['learning_rate'])+'.')

    def adjustEpochs(self,epochs):
        '''
        Description: adjusts the epochs hyperparameter

        Inputs:
        - learning_rate: the proposed number of epochs

        Returns: none
        '''

        #confirm the validity of the proposed number of epochs
        assert type(epochs)==int
        assert epochs>0

        #inform the user of the current number of epochs, as well as the updated number of epochs
        print('The current number of epochs is '+str(self.__hp['epochs'])+'.')
        self.__hp['epochs']=epochs
        print('And the new number of epochs is '+str(self.__hp['epochs'])+'.')

    def adjustBatchSize(self,size):
        '''
        Description: adjusts the mini batch size hyperparameter

        Inputs:
        - learning_rate: the proposed mini batch size

        Returns: none
        '''

        #confirm the validity of the proposed number of epochs
        assert type(size)==int
        assert size>0

        #inform the user of the current mini batch size, as well as the updated mini batch size
        print('The current mini batch size is '+str(self.__hp['mini_batch_size'])+'.')
        self.__hp['mini_batch_size']=size
        print('And the new mini batch size is '+str(self.__hp['mini_batch_size'])+'.')
    
    def adjustInitialization(self,init):
        '''
        Description: adjusts the initialization method hyperparameter

        Inputs:
        - learning_rate: the proposed intialization method

        Returns: none
        '''

        #confirm the validity of the proposed intialization technique
        assert init in ['None','He','Xavier','Other']

        #inform the user of the current intialization technique, as well as the updated initialization technique
        print('The current intialization tehcnique is '+str(self.__hp['initialization'])+'.')
        self.__hp['initialization']=init
        print('And the new initialization technique is '+str(self.__hp['initialization'])+'.')

    def adjustStrips(self,strips):
        '''
        Description: adjusts the strips hyperparameter

        Inputs:
        - learning_rate: the proposed number of strips

        Returns: none
        '''

        #confirm the validity of the proposed numbwe of strips
        assert type(strips)==int
        assert strips>1

        #inform the user of the current number of strips, as well as the updated number of strips
        print('The current number of strips into which training examples are split is '+str(self.__hp['strips'])+'.')
        self.__hp['strips']=strips
        print('And the new number of strips into which training examples are split is '+str(self.__hp['strips'])+'.')
    
    def compile(self):
        '''
        Description: compiles the model, trains it, and then assesses its performance on the test set

        Inputs: none

        Returns: none
        '''

        #check the validity of the relevant hyperparameters before initiating training
        assert self.__hp['learning_rate']>0
        assert self.__hp['epochs']>0
        assert self.__hp['mini_batch_size']>0
        assert self.__hp['strips']>1

        #expand the activation function hyperparameters for both the sub- and the supnet
        self.__hp['subnet_activations'].append(softmax)
        self.__hp['supnet_activations'].append(softmax)

        #exectute the relevant training
        try:
            self.__subnet_parameters,self.__supnet_parameters,inf_caches=train(self.__X_train,self.__Y_train,self.__hp)
        except:
            raise UnboundLocalError('self.__X_train and self.__Y_train referenced before assignment, meaning you have not loaded in any data. please load in a dataset and try again.')

        #instantiate the networks inference parameters
        self.__subnet_inf_parameters={}
        for i in range(len(self.__subnet_parameters)):
            self.__subnet_inf_parameters['subnet'+str(i+1)]=to_inf_params(self.__subnet_parameters['subnet'+str(i+1)],inf_caches['subnet'+str(i+1)])
        self.__supnet_inf_parameters=to_inf_params(self.__supnet_parameters,inf_caches['supnet'])

        #check the network's performance on the test set
        print('The network\'s performance on the test set yields an error of roughly '+str(perc_error(self.__X_test,self.__Y_test,self.__subnet_inf_parameters,self.__supnet_inf_parameters,self.__hp)))

        #rectify the activation function hyperparameters for both the sub- and the supnet once training is complete
        self.__hp['subnet_activations'].pop()
        self.__hp['supnet_activations'].pop()

<h1>FruitNet_v1.0

In [29]:
#instantiate the network
fruitnet_v1=FruitNet()

#load in data from (my own) Google Drive
fruitnet_v1.loadData('/content/drive/My Drive/training_set.h5','inputs','labels')

#add a hidden layer of size 128 with ReLU activation to the subnet
fruitnet_v1.addLayer('sub_hidden_1',128,relu)

#adjust each of the relevant hyperparameters
fruitnet_v1.adjustLearningRate(0.05)
fruitnet_v1.adjustEpochs(256)
fruitnet_v1.adjustBatchSize(128)
fruitnet_v1.adjustInitialization('Other')
fruitnet_v1.adjustStrips(4)

#produce and view a summary of the model
fruitnet_v1.modelSummary()

#train the model
fruitnet_v1.compile()

The current learning rate is 0.
And the new learning rate is 0.05.
The current number of epochs is 0.
And the new number of epochs is 256.
The current mini batch size is 0.
And the new mini batch size is 128.
The current intialization tehcnique is None.
And the new initialization technique is Other.
The current number of strips into which training examples are split is 0.
And the new number of strips into which training examples are split is 4.
NETWORK ARCHITECTURE SUMMARY


SUBNET SUMMARY
--------------------------------------------------
Input Layer:
- Name: N/A
- No. of Input Nodes: 196
- Activation Function: N/A
--------------------------------------------------
Hidden Layer 1:
- Name: sub_hidden_1
- No. of Hidden Nodes: 128
- Activation Function: relu
--------------------------------------------------
Output Layer:
- Name: N/A
- No. of Output Nodes: 10
Activation Function: softmax
--------------------------------------------------


SUPNET SUMMARY
---------------------------------