In [None]:
import numpy as np
import model
from numpy.lib.stride_tricks import sliding_window_view

def xavier_init(in_size, out_size):
    limit = 1/(np.sqrt(in_size))
    return np.random.uniform(low=-limit, high=limit, size=(out_size, in_size))


def he_init(kernel_num, kernel_size):
    out = []
    for i in range(kernel_num):
        out.append(np.random.normal(0, np.sqrt(2.0/kernel_size[0]), size=kernel_size))
    return out


# A single layer of neurons
class ConvLayer():
    
    def __init__(self, kernel_size):
        self.kernel_size = kernel_size
        self.kernels = he_init(3, kernel_size)
        self.bias = []  #np.random.randn(output_size, 1) * 0.1  # TODO
        self.activation = model.relu
        self.backward_activation = model.relu_backward
        self.a_prev = []
        self.output = []
        self.dK = []
        self.dX = []
        
    
    def update(self, lr):
        for i in range(len(self.kernels)):
            self.kernels[i] = self.kernels[i] - lr * self.dK[i]
            # TODO: bias
        
        
    def correlate(self, input_data, kernel):
        windows = sliding_window_view(input_data, self.kernel_size)
        hadamard = windows * kernel
        correlated = np.sum(hadamard, axis=(2,3))
        return correlated
        
    
    def forward_propagation(self, input_data):
        self.a_prev = input_data
        
        print("correlate input data:")
        print(input_data.shape)
        output = []
        
        if (len(input_data) == 1):
            for data in input_data:
                print("correlate data:")
                print(data.shape)
                for kernel in self.kernels:
                    output.append(self.correlate(data, kernel))
        else:
            for i in range(len(input_data)):
                print("correlate data:")
                print(input_data[i].shape)
                output.append(self.correlate(input_data[i], self.kernels[i]))
            
        
        self.output = np.asarray(output)
        return self.output
    
    
    def backward_propagation(self, output_errors):
        m = self.a_prev.shape[1]
        
        dK = []
        if (len(output_errors) == 1):
            for i in range(len(self.a_prev)): 
                for errors in output_errors:
                    dK.append(self.correlate(self.a_prev[i], errors))
        else:
            for i in range(len(self.a_prev)): 
                dK.append(self.correlate(self.a_prev[i], output_errors[i]))
            
                
        # Specify the amount of padding for each dimension
        bottom_padding = 2
        right_padding = 3
        
        dX = []
        if (len(output_errors) == 1):
            for i in range(len(self.a_prev)): 
                for kernel in self.kernels:
                    # Pad the matrix with zeros at the bottom and right
                    padded_kernel = np.pad(kernel, ((0, bottom_padding), (0, right_padding)), mode='constant', constant_values=0)
                    dX.append(self.correlate(padded_kernel, self.a_prev[i]))
        else:
            for i in range(len(self.a_prev)):
                # Pad the matrix with zeros at the bottom and right
                padded_kernel = np.pad(self.kernels[i], ((0, bottom_padding), (0, right_padding)), mode='constant', constant_values=0)
                dX.append(self.correlate(padded_kernel, self.a_prev[i]))
            
            
            # TODO: bias derivative
        self.dK = np.asarray(dK)
        self.dX = np.asarray(dX)
        
        return self.dX