In [2]:
import numpy as np
from tqdm import tqdm

In [3]:
def auto_encoder(feature_data,t_hidden,t_output,n2,mu,rho,passes_ac):

    # Initialization
    L = 3 # total number of layers, including input and output layers --> ONE hidden layer
    n1 = feature_data.shape[-1] # size of input layer, which is equal to M
    nL = n1 # size of output layer, which is equal to the number of labels
    n3 = n1 # number of nodes in fictitious layer 3
    Q = nL # size of output layer; same as nL, which the number of classes as well.

    W = (1/np.sqrt(n1))*np.random.randn(n2, n1)
    theta = np.random.randn(n2)
    Wx = (1/np.sqrt(n2))*np.random.randn(n1, n2)
    thetax = np.random.randn(n1)

    yCell = [None]*L # to save the y vectors across layers
    zCell = [None]*L # to save the z vectors across layers
    dCell = [None]*L # to save the sensitivity delta vectors
    Wcell = [W, Wx] # a cell array containing the weight matrices of different dimensions
    ThetaCell = [theta, thetax] # a cell array for the thetas

    # Training using random reshuffling
    N = feature_data.shape[0] # numbe of data points

    for px in tqdm(range(passes_ac)):
        Px = np.random.permutation(N) # using random reshuffling
        for n in range(N): # training a neural network with one hidden layer
            h = feature_data[Px[n]] # a column vector
            gamma = h.copy()

            y = h.copy()
            yCell[0] = y.copy()

            # FORWARD PROPAGATION
            ell = 0 # first hidden layer
            Weight = Wcell[ell]
            theta = ThetaCell[ell]
            y = yCell[ell]
            z = Weight@y - theta
            zCell[ell+1] = z.copy() # save z_{ell+1}

            K = z.shape[0]
            y = np.zeros(K) # let us now generate y_{ell+1}; same size as z

            if t_hidden == 1: # sigmoid
                y = 1/(1+np.exp(-z))
            elif t_hidden == 2: # tanh
                a = np.exp(z) - np.exp(-z)
                b = np.exp(z) + np.exp(-z)
                y = a/b 
            elif t_hidden == 3: # rectifier
                y = np.array([max(0, z[k]) for k in range(K)])
            elif t_hidden == 4: # linear
                y = z.copy()
            yCell[ell+1] = y.copy() # save y_{ell+1}

            ell = 1 # output layer
            Weight = Wcell[ell]
            theta = ThetaCell[ell]
            y = yCell[ell]
            z = Weight@y - theta
            zCell[ell+1] = z.copy() # save z_{ell+1}

            K = z.shape[0]
            y = np.zeros(K) # let us now generate y_{ell+1}; same size as z

            if t_output == 1: # sigmoid
                y = 1/(1+np.exp(-z))
            elif t_output == 2: # tanh
                a = np.exp(z) - np.exp(-z)
                b = np.exp(z) + np.exp(-z)
                y = a/b 
            elif t_output == 3: # rectifier
                y = np.array([max(0, z[k]) for k in range(K)])
            elif t_output == 4: # linear
                y = z.copy()
            yCell[ell+1] = y.copy() # save y_{ell+1}

            zL = zCell[-1]
            yL = yCell[-1]
            K = zL.shape[0]
            gamma_hat = yL.copy()

            J = np.zeros((K, K))
            if t_output == 1: # sigmoid
                f = 1/(1+np.exp(-zL))
                J = np.diag(f*(1-f)) # computing f'(z_L) in diagonal matrix form
            elif t_output == 2: # tanh
                b = np.exp(zL) + np.exp(-zL) # computing f'(z_L) in diagonal matrix form
                J == np.diag(4/b**2)
            elif t_output == 3: # rectifier
                for k in range(K):
                    if z[k] == 0: # set, by convention, f'(z) to zero at z=0 for the rectifier function
                        J[k, k] = 0
                    elif z[k] > 0:
                        J[k, k] = 1
                    elif z[k] < 0:
                        J[k, k] = 0
            elif t_output == 4: # linear
                J = np.eye(K)
            
            deltaL = 2*J@(gamma_hat - gamma)
            dCell[-1] = deltaL.copy() # boundary delta

            # BACKPROPAGATION
            ell = L - 1 # start the backward propagation
            Weight_before = Wcell[ell-1]
            theta_before = ThetaCell[ell-1]
            y = yCell[ell-1]
            delta = dCell[ell]

            Weight = (1-2*mu*rho)*Weight_before - mu*delta*y.T
            Wcell[ell-1] = Weight.copy() # update weight

            theta = theta_before + mu*delta 
            ThetaCell[ell-1] = theta # update theta

            if ell >= 2: # computing next delta only for ell >= 2
                z = zCell[ell-1]
                K = z.shape[0]
                J = np.zeros((K, K))
                #we should use here the activation of the HIDDEN layer
                if t_hidden == 1: # sigmoid
                    f = 1/(1+np.exp(-z))
                    J = np.diag(f*(1-f))
                elif t_hidden == 2: # tanh
                    b = np.exp(z) + np.exp(-z)
                    J = np.diag(4/b**2)
                elif t_hidden == 3: # rectifier
                    for k in range(K):
                        if z[k] == 0: # set, by convention, f'(z) to zero at z=0 for the rectifier function
                            J[k, k] = 0
                        elif z[k] > 0:
                            J[k, k] = 1
                        elif z[k] < 0:
                            J[k, k] = 0
                elif t_hidden == 4: # linear
                    J = np.eye(K)
                dCell[ell-1] = J@((Weight_before).T@delta)

            ell = 1 # next backward iteration
            Weight_before = Wcell[ell-1]
            theta_before = ThetaCell[ell-1]
            y = yCell[ell-1]
            delta = dCell[ell]

            Weight = (1-2*mu*rho)*Weight_before - mu*delta@y.T 
            Wcell[ell-1] = Weight # update weight

            theta = theta_before + mu*delta
            ThetaCell[ell-1] = theta.copy() # update theta
    W = Wcell[0]
    theta = ThetaCell[0]
    return W, theta