In [None]:
import numpy as np
import matplotlib.pyplot as plt
import umap
import data

def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def sigmoid_derivative(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

def relu(Z):
    return np.maximum(0,Z)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy = True)
    dZ[Z <= 0] = 0;
    return dZ

# suffers from vanishing gradient problem
def tanh(x):
    return np.tanh(x)

def tanh_derivative(dA, Z):
    return 1 - tanh(Z)*tanh(Z)

def linear(x):
    return x

def linear_derivative(dA, Z):
    return dA
    
    
# mean squared error loss
def mean_squared_error_loss(Y_hat, Y):
    differences = np.subtract(Y, Y_hat)
    squared_differences = np.square(differences)
    return np.mean(squared_differences)


# All of the neuron layers
class AutoencoderModel():
    
    # 4 layers in total
    def __init__(self, layers, learning_rate, epochs, batch_size):
        self.layers = layers
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.cost_history_train = []
        self.cost_history = []
        self.accuracy_history = []
        self.cost_function = mean_squared_error_loss
    
    
    def encode(self, input_data):
        curr_a = input_data
        
        for i in range(7):
#             print("layer: " + str(i))
            layer = self.layers[i]
            prev_a = curr_a
            curr_a = layer.forward_propagation(prev_a)
#             print("layer output shape: " + str(curr_a.shape))
            
        return curr_a
    
    
    def decode(self, input_data):
        curr_a = input_data.T
        
        for i in range(7, 9):
            layer = self.layers[i]
            prev_a = curr_a
            curr_a = layer.forward_propagation(prev_a)
#             print("layer output shape: " + str(curr_a.shape))
            
        return curr_a
    
    
    def get_accuracy(self,Y_hat, Y):
        return - mean_squared_error_loss(Y_hat,Y)
            
    
    def mean_absolute_error(self, y_true, y_pred):
        return - np.mean(np.abs(y_true - y_pred))
    
    
    def get_initial_dA(self, Y_hat, Y):
        m = Y.shape[0]
#         print("Y_hat shape: " + str(Y_hat.shape) )
#         print("Y shape: " + str(Y.shape) )
        return (Y_hat.T - Y.reshape(Y.shape[0], 784)) #(1/m)*
            
        
    def full_backward_propagation(self, Y_hat, Y):
        # use cost function for first value
        dA_prev = self.get_initial_dA(Y_hat, Y).T
#         print("dA_prev shape: " + str(dA_prev.shape))

        i = 0
        for layer in reversed(self.layers):
#             print("backward layer: " + str(i))
            if (i == 2):
                dA_curr = dA_prev.T
            else:
                dA_curr = dA_prev
            dA_prev = layer.backward_propagation(dA_curr)
#             if (i == 2):
#                 dA_prev = dA_prev.reshape(1, dA_prev.shape[0], dA_prev.shape[1])
            i = i + 1
#             print("dA_prev shape: " + str(dA_prev.shape))
        
    
    def update(self):
        i = 0
        for layer in self.layers:
#             print("update layer: " + str(i))
            layer.update(self.learning_rate)
            i = i + 1
#             layer.weights = layer.weights - self.learning_rate * layer.dW
#             layer.bias = layer.bias - self.learning_rate * layer.db
            
    
    def predict(self, X):
        code = self.encode(X)
        output = self.decode(code)
        return output
    
    
    def train(self, X_train, X_val):
        X_train_to_compare = np.copy(X_train)
            
        for i in range(self.epochs):
            print("epoch: " + str(i))
            batches_num = int(len(X_train) / self.batch_size)
            
            for batch in range(0, len(X_train), self.batch_size):
                print("batch: " + str(batch))
                x = (X_train[batch*self.batch_size:(batch+1)*self.batch_size])
                x_to_compare = (X_train_to_compare[batch*self.batch_size:(batch+1)*self.batch_size])
                
                code = self.encode(x)
                output = self.decode(code)
                
                self.full_backward_propagation(output, x) # output with input
        
                self.update()
            
            y_pred = self.predict(X_val)
            accuracy_val = self.get_accuracy(y_pred, X_val.T)
            train_loss = self.cost_function(output, x_to_compare)
            
            print("accuracy: " + str(accuracy_val))
            print("loss: " + str(train_loss))
            
            self.cost_history_train.append(train_loss)
            self.cost_history.append(self.cost_function(self.predict(X_val.T), X_val.T))
            self.accuracy_history.append(accuracy_val)
            
        y_pred = self.predict(X_val.T)
        accuracy_val = self.get_accuracy(y_pred, X_val.T)
        print("validation accuracy (autoencoder): " + str(accuracy_val))
        return accuracy_val
    
    
    def graph_hidden_layer(self):
        layer = self.layers[0]
        w = layer.weights.T
        
        num_filters = w.shape[0]
        
        w -= w.min()
        w *= 255/w.max()
        
        size = num_filters//28
        
        side = np.sqrt(layer.output_size).astype(int) # for (784,144) it will be 12

        plt.figure(figsize = (200,200))
        for i in range(num_filters):
            plt.subplot(size, size, i + 1)
            plt.imshow(w[i].reshape((side, side)))
            plt.axis('off')
            
        plt.show()
        
    
    def graph_hidden_layer_2(self):
        layer = self.layers[0]
        w = layer.weights
        
        num_filters = w.shape[0]
        
        w -= w.min()
        w *= 255/w.max()
        
        size = num_filters//np.sqrt(layer.output_size).astype(int)
        
        side = 28 #np.sqrt(layer.output_size).astype(int) # for (784,144) it will be 12

        plt.figure(figsize = (200,200))
        for i in range(num_filters):
            plt.subplot(size, size, i + 1)
            plt.imshow(w[i].reshape((side, side)))
            plt.axis('off')
            
        plt.show()
        
    
    def umap_image(self, X_test, y_test):
        X_copy = np.copy(X_test)
        features = self.encode(X_copy).T
        
        print("shape features: " + str(features.shape))
        umap_embedding = umap.UMAP(n_neighbors=5).fit(features)
        
        plt.scatter(umap_embedding.embedding_[:, 0], umap_embedding.embedding_[:, 1], s= 5, c=y_test, cmap='Spectral')
        plt.title('Embedding of the test set by UMAP', fontsize=14)
        plt.colorbar()
        plt.show()
