In [1]:
import numpy as np
class Autoencoder_with_cross_entropy_with_weight_tying_with_Momentum_with_relu_activation:
    def __init__(self,X,hidden_dim):
        self.input_dim = X.shape[0]
        self.hidden_dim = hidden_dim
        self.output_dim = X.shape[0]
        self.weights_input_hidden = np.random.randn(self.hidden_dim,self.input_dim)*np.sqrt(2/self.input_dim)
        
        # *np.sqrt(2/self.layer_sizes[i]))
        
        self.momentum_input_hidden=np.zeros((self.hidden_dim,self.input_dim))
        
        
        self.bias_input_hidden=np.zeros((self.hidden_dim,1))
        self.bias_hidden_output=np.zeros((self.output_dim,1))
        self.bias_hidden_output_momentum=np.zeros((self.output_dim,1))
        self.bias_input_hidden_momentum=np.zeros((self.hidden_dim,1))
        
    def sigmoid(self,z):
         
        z=np.clip(z, -500, 500)
        return 1/(1+np.exp(-z))
    
    def relu(self,z):
        return np.maximum(0,z)
    
    def relu_derivative(self,z):
        return np.where(z>0,1,0)
    
    def sigmoid_derivative(self,a):
        return a*(1-a)
    
    def lossfunction(self,a,X):
        
        eps = 1e-10
        return -np.mean(X*np.log(a + eps)+(1-X)*np.log(1-a + eps))

    
        
    def forward(self,X):
        
        z1=self.weights_input_hidden@X+self.bias_input_hidden
        a1=self.relu(z1)
        z2=self.weights_input_hidden.T@a1+self.bias_hidden_output
        a2=self.sigmoid(z2)
        
        return z1,a1,z2,a2
    
  
    def derivative_lossfunction(self,a,X):
        eps = 1e-10
        return -X/a+(1-X)/(1-a+eps)

    def backward(self,X,a2,z2,a1,z1):
        
        da2=self.derivative_lossfunction(a2,X)
        dz2=self.sigmoid_derivative(a2)*da2
        
        dw2=dz2@a1.T/self.input_dim
        
        db2=np.sum(dz2,axis=1,keepdims=True)/self.input_dim
        da1=self.weights_input_hidden@dz2
        dz1=self.relu_derivative(z1)*da1
         
        dw1=dz1@X.T/self.input_dim
        db1=np.sum(dz1,axis=1,keepdims=True)/self.input_dim
        
        
        return dw1,db1,dw2,db2
    
    
    def update_weights(self, learning_rate, dw1, db1, dw2, db2,momentum_rate):
        self.momentum_input_hidden = momentum_rate * self.momentum_input_hidden + learning_rate * (dw1 + dw2.T)
        self.weights_input_hidden -= self.momentum_input_hidden
        self.bias_input_hidden_momentum = momentum_rate * self.bias_input_hidden_momentum + learning_rate * db1
        self.bias_input_hidden -= self.bias_input_hidden_momentum
        self.bias_hidden_output_momentum = momentum_rate * self.bias_hidden_output_momentum + learning_rate * db2
        self.bias_hidden_output -= self.bias_hidden_output_momentum
    def train(self,X,epochs,learning_rate,momentum_rate):
        for i in range(epochs):
            z1,a1,z2,a2=self.forward(X)
            dw1,db1,dw2,db2=self.backward(X,a2,z2,a1,z1)
            self.update_weights(learning_rate,dw1,db1,dw2,db2,momentum_rate)
            
            if i % 10 == 0:
                print(f"Epoch {i} loss: {self.lossfunction(a2,X)}")
                print(self.weights_input_hidden[:,0:5])
            
            
            
            
    def predict(self,X):
        z1,a1,z2,a2=self.forward(X)
        return a2


In [1]:
from sklearn.datasets import fetch_openml
import numpy as np

# Load MNIST
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.values / 255.0  # Normalize to [0,1]
X = X.T  # Now shape is (784, num_samples)


In [None]:
autoencoder_with_weight_tying_momentum_with_relu_activation = Autoencoder_with_cross_entropy_with_weight_tying_with_Momentum_with_relu_activation(X, hidden_dim=32)
autoencoder_with_weight_tying_momentum_with_relu_activation.train(X, epochs=100, learning_rate=0.1,momentum_rate=0.9)

