In [13]:
import numpy as np
import random as n

In [14]:
# Random initialization of ANN weights and biases

def create_ANN():
    '''
    initialize a feedforward ANN with the architecture:
    2 inputs -> 3 hidden neurons -> 2 outputs
    '''
    np.random.seed(42)
    n.seed(42)

    # Hidden layer: 2 inputs -> 3 neurons
    # W1 shape: (3,2) ; b1 shape: (3,1)
    W1 = np.random.randn(3,2) * 0.01 # small random weights
    b1 = np.zeros((3,1)) # zero biases

    # Output layer: 3 hidden -> 2 outputs
    # W2 shape: (2,3) ; b1 shape: (2,1)
    W2 = np.random.randn(2,3) * 0.01
    b2 = np.zeros((2,1))

    # store the parameters
    parameters = {
        'W1' : W1,
        'b1' : b1,
        'W2' : W2,
        'b2' : b2
    }

    return parameters

In [15]:
# Mini-batch: is a small amount of training data used for one gradient update
# so this function only selects data in batches
# it 1- shuffles the dataset
#    2- Yields mini-batches of (X_batch , Y_batch)

def mini_batch(X , Y , batch_size):
    # ensure inputs are numpy arrays
    X = np.array(X)
    Y = np.array(Y)

    N = X.shape[0] # number of samples
    assert N == Y.shape[0], ' X and Y must have the number of samples '

    # 1- Create shuffeled indicies of X and Y in the same order
    indicies = list(range(N))
    n.shuffle(indicies)

    indicies = np.array(indicies)

    # 2- Yield batches one at a time
    for i in range(0 , N , batch_size):
        batch_indicies = indicies[i : i + batch_size]
        X_batch = X[batch_indicies]
        Y_batch = Y[batch_indicies]
        yield (X_batch , Y_batch)

In [None]:
#------(Activation Function for continuous labels [Regression])------
def sigmoid(z):
    pass

def derivative_sigmoid(z_prime):
    pass
#-----------------------------------------------------

#------(Activation Function for hot encoded labels [Classification])------
def softmax(z):
    pass

def derivative_softmax(z_prime):
    pass
#--------------------------------------------------------

In [None]:
def feed_forward(X_batchh , param , keep_prob = 0.5 , training = True , smoothing = False):
    W1 , b1 = param['W1'] , param['b1']
    W2 , b2 = param['W2'] , param['b2']

    m = X_batchh.shape[0]
    X_T = X_batchh.T

    #layer 1: input -> hidden
    z1 = np.dot(W1 , X_T) + b1
    a1 = np.maximum(0 , z1) # ReLU activation

    #-----------(Inverted Dropout)-------------#
        # inverted dropout is applied only during training
    if training:
        # create dropout mask: 1 -> keep , 0 -> drop
        dropout_mask = (np.random.rand(*a1.shape) > keep_prob).astype(float)
        # scale surviving neurons by 1/(1 - keep_prob) -> 'inverted dropout'
        a1 = a1 * dropout_mask / (1.0 - keep_prob)
    else:
        dropout_mask = None # not used in inference (prediction phase)
    #-------------------------------------------#

    #layer 2: hidden -> output
    z2 = np.dot(W2 , a1) + b2
    a2 = np.maximum(0 , z2) # ReLU activation

    #-----------(Label Smoothing)-------------#
    if smoothing != False:
        epsilon = 0.1
        num_classes = a2.shape[0]
        a2 = a2 * (1 - epsilon) + (epsilon / num_classes)
    #-----------------------------------------#


    cache = {
        'X_T' : X_T,
        'z1': z1,
        'a1_before_dropout': np.maximum(0 , z1), #needed for backprop without scaling
        'a1' : a1,
        'dropout_mask' : dropout_mask,
        'z2' : z2,
        'a2' : a2,
        'keep_prob' : keep_prob,
        'training' : training
    }

    return a2 , cache

    

In [None]:
def cross_entropy_loss(a2 , Y_batch):
    #Cross-entropy Loss because the outputs are hot encoded
    pass

def compute_loss(a2 , Y_batch):
    # Compute MSE loss (Mean Sequared Error)
    pass

In [None]:
def backpropagation(a2 , Y_batch , cache , param):
    pass

Inputs

In [None]:
# Random training data
# inputs
X_ = np.random.randn(100 , 2) # 100 samples , 2 features
# outputs
Y_ = np.random.randint(0 , 2 , size=(100 , 2)) # 100 samples , 2 classes (one-hot encoded)

# learning rate
lr = 0.01

# Dropout keep-probability
#keep_prob = 0.5

# label smoothing parameter (epsilon)
# label_smoothing = 0.1

Training Phase

In [None]:
model = create_ANN()

for epoch in range(10):
    for X_batch , Y_batch in mini_batch(X_ , Y_ , batch_size = 32):
        # print("X_batch shape:", X_batch.shape)
        # print("Y_batch shape:", Y_batch.shape)
        # pass

        #-----(feed forward with inverted dropout and label smoothing)-----
        a_output , cache = feed_forward(X_batch , model , keep_prob = 0.5 , training = True)

