In [1]:
def init_network(no_of_layers, input_dim, neurons_per_layer):
    '''
    no_of_layers: count
    input_dim: m
    neurons_per_layer: list in order L1, L2, L3 ... Lout
    
    returns:
    net:    dict instance
    '''
    net = {
        "no_of_layers": no_of_layers,
        "W1": np.random.randn(neurons_per_layer[0], input_dim)*0.01,
        "b1": np.zeros((neurons_per_layer[0], 1))
    }
    
    for i in range(1, no_of_layers):
        net["W"+str(i+1)] = np.random.randn(neurons_per_layer[i], neurons_per_layer[i-1])*0.01
        net["b"+str(i+1)] = np.zeros((neurons_per_layer[i], 1))

    return net

In [2]:
def sigmoid (x):
    '''
    Parameters:
    x - input
    
    Returns:
    answer - The sigmoid vaue of 'x'
    '''
    answer = 1/(1 + np.exp(-x))
    return answer

In [2]:
def ReLU(x):
    
    
    return np.maximum(np.zeros((x.shape[0], x.shape[1], x.shape[2], x.shape[3])), x)

In [3]:
def softmax(Z):
    A = []
    exp_Z = []
    for i in range(0, Z.shape[0]):
        exp_Z.append(np.exp(Z[i, :]))
    
    exp_Z = np.array(exp_Z)
    sum_exp_Z = np.sum(exp_Z, axis = 0, keepdims = True)
    
    for i in range(0, Z.shape[0]):
        ans = (exp_Z[i, :] / sum_exp_Z)
        A.append(ans)
    
    A = np.array(A)
    A = A.reshape((A.shape[0], A.shape[2]))
    return A

In [4]:
def  forwardPropagation (A_prev, W, b, activation):
    '''
    Parameters:
    model = perceptron class instance, where it is the model initialized using initializeParameters
    X =     [[all dim1 vals], [all dim2 vals]]    i.e.inputs stacked vertically

    Returns:
    modelOutput = The class instance with 'x', 'Z1', 'A1' and 'Z2', 'A2' in it
    '''
    
    Z = np.dot(W, A_prev) + b
    
    if(activation == "sigmoid"):
        A = sigmoid(Z)
    elif(activation == "softmax"):
        A = softmax(Z)
    else:
        print("wrong activation")
        
    return A, Z

In [5]:
def feedForward(net, X):
    '''
    net:   
    X:     
    
    return:
    net:       
    '''
    no_of_layers = net["no_of_layers"]
    net["X"] = X
    
    A_prev = X
    activation = "sigmoid"
    
    for i in range(1, no_of_layers):
        W = net["W"+str(i)]
        b = net["b"+str(i)]
        net["AL"+str(i)], net["ZL"+str(i)] = forwardPropagation (A_prev, W, b, activation)
        A_prev = net["AL"+str(i)]
    
    W = net["W"+str(no_of_layers)]
    b = net["b"+str(no_of_layers)]
    net["AL"+str(no_of_layers)], net["ZL"+str(no_of_layers)] = forwardPropagation (A_prev, W, b, activation = "softmax")
        
    return net

In [6]:
def CostCalculation(m, net_out, Y):
    '''
    net_out:  
    Y: ground truth
    
    reutrn:
    cost
    '''
    
    # one hot encode
    encoded_Y = to_categorical(Y)
    encoded_Y = encoded_Y.reshape(encoded_Y.shape[1], encoded_Y.shape[2])
    encoded_Y = encoded_Y.T
    
    cost = (1/(2*m)) * np.sum(np.power(abs(encoded_Y - net_out), 2), axis = 1, keepdims = True)
    
    return cost

In [7]:
def BackProp(net, Y):
    '''
    parms:
    net =  dict
    Y =    GT
    
    returns:
    gradients =   
    '''
    gradients = {}
    no_of_layers = net["no_of_layers"]
    
    # one hot encode
    encoded_Y = to_categorical(Y)
    encoded_Y = encoded_Y.reshape(encoded_Y.shape[1], encoded_Y.shape[2])
    encoded_Y = encoded_Y.T
    m = encoded_Y.shape[1]
    W = net["W"+str(no_of_layers)]
    b = net["b"+str(no_of_layers)]
    Z = net["ZL"+str(no_of_layers)]
    A = net["AL"+str(no_of_layers)]
    
    dZ = (A - encoded_Y) * (A*(1-A))
    
    dW = (1/m)*(np.dot(dZ, net["AL"+str(no_of_layers-1)].T))
    db = (1/m)*(np.sum(dZ, axis = 1, keepdims = True))
    gradients["dW"+str(no_of_layers)] = dW
    gradients["db"+str(no_of_layers)] = db
    
    for i in range(1, no_of_layers-1):
        dZ = np.dot(net["W"+str(no_of_layers -i+1)].T, dZ) * (net["AL"+str(no_of_layers -i)] - (net["AL"+str(no_of_layers -i)]*net["AL"+str(no_of_layers -i)]))
        dW = (1/m)*(np.dot(dZ, net["AL"+str(no_of_layers -i-1)].T))
        db = (1/m)*(np.sum(dZ, axis = 1, keepdims = True))
        gradients["dW"+str(no_of_layers -i)] = dW
        gradients["db"+str(no_of_layers -i)] = db
    
    i = no_of_layers - 1    
    dZ = np.dot(net["W"+str(no_of_layers -i+1)].T, dZ) * (net["AL"+str(no_of_layers -i)] - (net["AL"+str(no_of_layers -i)]*net["AL"+str(no_of_layers -i)]))
    dW = (1/m)*(np.dot(dZ, net["X"].T))
    db = (1/m)*(np.sum(dZ, axis = 1, keepdims = True))
    gradients["dW"+str(no_of_layers -i)] = dW
    gradients["db"+str(no_of_layers -i)] = db
    
    
    return gradients

In [8]:
def  weightUpdate (net, gradients, lr_rate):
    '''
    
    '''
    
    no_of_layers = net["no_of_layers"]
    
    for i in range(1, no_of_layers+1):
        net["W"+str(i)] = net["W"+str(i)] - (lr_rate * gradients["dW"+str(i)])
        net["b"+str(i)] = net["b"+str(i)] - (lr_rate * gradients["db"+str(i)])
        

    return net

In [10]:
def train_Model(net, train_X, train_Y, test_X, test_Y, numberofEpochs = 100, lr_rate = 0.1):
    '''
    
    '''
    
    #after flatten
    no_of_layers = net["no_of_layers"]
    All_Train_Loss = []
    All_Test_Loss = []
    
    m = train_X.shape[1]
    
    
    for i in range(1, numberofEpochs+1):
        trn_net = feedForward(net, train_X)
        trn_loss = CostCalculation(train_X.shape[1], trn_net["AL"+str(no_of_layers)], train_Y)
        All_Train_Loss.append((1/10)*np.sum(trn_loss, axis = 0, keepdims = True))
        
        trn_grads = BackProp(trn_net, train_Y)
        net = weightUpdate(trn_net, trn_grads, lr_rate)    
        
        tst_net = feedForward(net, test_X)
        tst_loss = CostCalculation(test_X.shape[1], tst_net["AL"+str(no_of_layers)], test_Y)
        All_Test_Loss.append((1/10)*np.sum(tst_loss, axis = 0, keepdims = True))
        
        if((i%50 == 0) or (i in range(1, 10+1))):
            print("epoch #"+ str(i) + ": \tTrain Loss = "+ str(All_Train_Loss[-1]) + "\t\t Validation Loss = " + str(All_Test_Loss[-1]))
    
    out = {
        "net": net,
        "All_Train_Loss": All_Train_Loss,
        "All_Test_Loss": All_Test_Loss
    }
    return out

In [None]:
def Train_Model(net, train_X, train_Y, test_X, test_Y, filters, numberofEpochs = 100, lr_rate = 0.1):
    '''
    
    '''
    # Covolving train_X+test_X
    #on train set
    if os.path.isfile("conv_all_train_X.npz"): 
        npzfile = np.load("conv_all_train_X.npz")
        train_X_conv = npzfile['out']
    else:
        train_X_conv = conv_forward_all(train_X, filters)
        np.savez("conv_all_train_X.npz", out = train_X_conv)

    #on test set

    if os.path.isfile("conv_all_test_X.npz"): 
        npzfile = np.load("conv_all_test_X.npz")
        test_X_conv = npzfile['out']
    else:
        test_X_conv = conv_forward_all(test_X, filters)
        np.savez("conv_all_test_X.npz", out = test_X_conv)
    
    #pool train_X+test_X
    #on train set
    if os.path.isfile("pool_all_train_X.npz"): 
        npzfile = np.load("pool_all_train_X.npz")
        train_X_conv_pool = npzfile['out']
    else:
        train_X_conv_pool = pool_forward_all(train_X_conv)
        np.savez("pool_all_train_X.npz", out = train_X_conv_pool)

    #on test set
    if os.path.isfile("pool_all_test_X.npz"): 
        npzfile = np.load("pool_all_test_X.npz")
        test_X_conv_pool = npzfile['out']
    else:
        test_X_conv_pool = pool_forward_all(test_X_conv)
        np.savez("pool_all_test_X.npz", out = test_X_conv_pool)
    
    #Apply ReLU
    #train
    train_X_conv_pool_relu = ReLU(train_X_conv_pool)
    
    #test
    test_X_conv_pool_relu = ReLU(test_X_conv_pool)
    
    #flatten train_X+test_X
    train_X_conv_pool_flatten = train_X_conv_pool.reshape(train_X_conv_pool.shape[0], -1).T
    test_X_conv_pool_flatten = test_X_conv_pool.reshape(test_X_conv_pool.shape[0], -1).T
    
    #after flatten
    no_of_layers = net["no_of_layers"]
    All_Train_Loss = []
    All_Test_Loss = []
    
    m = train_Y.shape[1]
    
    
    for i in range(1, numberofEpochs+1):
        trn_net = feedForward(net, train_X_conv_pool_flatten)
        trn_loss = CostCalculation(train_X_conv_pool_flatten.shape[1], trn_net["AL"+str(no_of_layers)], train_Y)
        All_Train_Loss.append((1/10)*np.sum(trn_loss, axis = 0, keepdims = True))
        
        trn_grads = BackProp(trn_net, train_Y)
        net = weightUpdate(trn_net, trn_grads, lr_rate)    
        
        #below was expected tobe done in test() function
        tst_net = feedForward(net, test_X)
        tst_loss = CostCalculation(test_X.shape[1], tst_net["AL"+str(no_of_layers)], test_Y)
        All_Test_Loss.append((1/10)*np.sum(tst_loss, axis = 0, keepdims = True))
        
        if((i%50 == 0) or (i in range(1, 10+1))):
            print("epoch #"+ str(i) + ": \tTrain Loss = "+ str(All_Train_Loss[-1]) + "\t\t Validation Loss = " + str(All_Test_Loss[-1]))
    
    out = {
        "net": net,
        "All_Train_Loss": All_Train_Loss,
        "All_Test_Loss": All_Test_Loss
    }
    return out