In [2]:
import numpy as np
import matplotlib.pyplot as plt
import umap

def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def sigmoid_derivative(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

def relu(Z):
    return np.maximum(0,Z)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy = True)
    dZ[Z <= 0] = 0;
    return dZ

# suffers from vanishing gradient problem
def tanh(x):
    return np.tanh(x)

def tanh_derivative(dA, Z):
    return 1 - tanh(Z)*tanh(Z)

def linear(x):
    return x

def linear_derivative(dA, Z):
    return dA
    
    
# mean squared error loss
def mean_squared_error_loss(Y_hat, Y):
    differences = np.subtract(Y, Y_hat)
    squared_differences = np.square(differences)
    return np.mean(squared_differences)


# All of the neuron layers
class AutoencoderModel():
    
    # 4 layers in total
    def __init__(self, layers, learning_rate, epochs, batch_size, hidden_layers_num=2):
        self.layers = layers
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.cost_history_train = []
        self.cost_history = []
        self.accuracy_history = []
        self.cost_function = mean_squared_error_loss
        self.hidden_layers_num = hidden_layers_num
    
    
    def encode(self, input_data):
        curr_a = input_data
        
        for i in range(self.hidden_layers_num):
            layer = self.layers[i]
            prev_a = curr_a
            curr_a = layer.forward_propagation(prev_a)
            
        return curr_a
    
    
    def decode(self, input_data):
        curr_a = input_data
        
        for i in range(self.hidden_layers_num, self.hidden_layers_num+self.hidden_layers_num):
            layer = self.layers[i]
            prev_a = curr_a
            curr_a = layer.forward_propagation(prev_a)
            
        return curr_a
        
    
    def get_accuracy(self,Y_hat, Y):
        return self.mean_absolute_error(Y,Y_hat)
        
    
    def mean_absolute_error(self, y_true, y_pred):
        return - np.mean(np.abs(y_true - y_pred))
    
    
    def get_initial_dA(self, Y_hat, Y):
        m = Y.shape[0]
        return (Y_hat - Y) #(1/m)*
            
        
    def full_backward_propagation(self, Y_hat, Y):
        # use cost function for first value
        dA_prev = self.get_initial_dA(Y_hat, Y)

        for layer in reversed(self.layers):
            dA_curr = dA_prev
            dA_prev, dW_curr, db_curr = layer.backward_propagation(dA_curr)
        
    
    def update(self):
        for layer in self.layers:
            layer.weights = layer.weights - self.learning_rate * layer.dW
            layer.bias = layer.bias - self.learning_rate * layer.db
            
    
    def predict(self, X):
#         return self.encode(X)
        code = self.encode(X)
        output = self.decode(code)
        return output
    
    
    def graph_hidden_layer(self):
        layer = self.layers[0]
        w = layer.weights
        
        print("img full shape")
        print(str(w.shape))
        
        num_filters = w.shape[0]
        
        w -= w.min()
        w *= 255/w.max()
        
        size = num_filters//12

        plt.figure(figsize = (288,288))
        for i in range(num_filters):
            plt.subplot(size, size, i + 1)
            plt.imshow(w[i].reshape((28, 28)))
            plt.axis('off')
            
        plt.show()
        
    
    def umap_image(self, X_test, y_test):
        features = self.encode(X_test)
        umap_embedding = umap.UMAP(n_neighbors=5, random_state=42).fit(features.T)
        
        plt.scatter(umap_embedding.embedding_[:, 0], umap_embedding.embedding_[:, 1], s= 5, c=y_test, cmap='Spectral')
#         plt.scatter(umap_embedding[:, 0], umap_embedding[:, 1], s=5, c=y_test, cmap='Spectral')
        plt.title('Embedding of the training set by UMAP', fontsize=24)
        plt.colorbar()
        plt.show()
    
    
    def train(self,X_train,Y_train, X_val, Y_val):
        for i in range(self.epochs):
            
            batches_num = int(len(X_train) / self.batch_size)
            
            for batch in range(batches_num - 1):
                x = (X_train[batch*self.batch_size:(batch+1)*self.batch_size]).T
                y = (Y_train[batch*self.batch_size:(batch+1)*self.batch_size])
                
                code = self.encode(x)
                output = self.decode(code)
                
                self.full_backward_propagation(output, x) # output with input
                self.update()
            
            y_pred = self.predict(X_val.T)
            accuracy_val = self.get_accuracy(y_pred, X_val.T)
            
            self.cost_history_train.append(self.cost_function(output, x))
            self.cost_history.append(self.cost_function(self.predict(X_val.T), X_val.T))
            self.accuracy_history.append(accuracy_val)
            
        y_pred = self.predict(X_val.T)
        accuracy_val = self.get_accuracy(y_pred, X_val.T)
        print("validation accuracy: " + str(accuracy_val))
        return accuracy_val
