<a href="https://colab.research.google.com/github/aderibigbeolamide/Assignment_Machine_Learning/blob/main/SimpleConv1d.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import numpy as np

###Problem 1

In [2]:
class SimpleConv1d:
    def __init__(self, filter_size, initializer, optimizer, stride=1):
        self.filter_size = filter_size
        self.stride = stride
        self.optimizer = optimizer
        self.W = initializer.W(1, filter_size)
        self.B = initializer.B(1)
        self.grads = {}
        self.X = None

    def forward(self, X):
        self.X = X
        output_size = (len(X) - self.filter_size) // self.stride + 1
        out = np.zeros(output_size)

        for i in range(output_size):
            out[i] = np.sum(X[i*self.stride:i*self.stride + self.filter_size] * self.W) + self.B
        return out

    def backward(self, dA):
        dW = np.zeros_like(self.W)
        dB = np.sum(dA)
        dX = np.zeros_like(self.X)
        output_size = (len(self.X) - self.filter_size) // self.stride + 1

        for i in range(output_size):
            dW += dA[i] * self.X[i*self.stride:i*self.stride + self.filter_size]
            dX[i*self.stride:i*self.stride + self.filter_size] += dA[i] * self.W

        self.grads['W'] = dW
        self.grads['B'] = dB
        self.optimizer.update(self)
        return dX

class XavierInitializer:
    def W(self, input_dim, output_dim):
        return np.random.randn(input_dim, output_dim) * np.sqrt(1 / input_dim)

    def B(self, output_dim):
        return np.zeros(output_dim)
class AdaGrad:
    def __init__(self, lr=0.01):
        self.lr = lr
        self.h = None

    def update(self, layer):
        if self.h is None:
            self.h = {}
            for key, value in layer.grads.items():
                self.h[key] = np.zeros_like(value)

        for key in layer.grads.keys():
            self.h[key] += layer.grads[key] * layer.grads[key]
            layer.W -= self.lr * layer.grads[key] / (np.sqrt(self.h[key]) + 1e-7)

            if key == 'B':
                layer.B -= self.lr * layer.grads[key] / (np.sqrt(self.h[key]) + 1e-7)


###Problem 2

In [3]:
class SimpleConv1d:
    def __init__(self, filter_size, initializer, optimizer, stride=1, padding=0):
        self.filter_size = filter_size
        self.stride = stride
        self.padding = padding
        self.optimizer = optimizer
        self.W = initializer.W(1, filter_size)
        self.B = initializer.B(1)
        self.grads = {}
        self.X = None

    def forward(self, X):
        self.X = np.pad(X, (self.padding, self.padding), 'constant')
        output_size = self.calculate_output_size(len(self.X), self.padding, self.filter_size, self.stride)
        out = np.zeros(output_size)

        for i in range(output_size):
            out[i] = np.sum(self.X[i*self.stride:i*self.stride + self.filter_size] * self.W) + self.B
        return out

    def backward(self, dA):
        dW = np.zeros_like(self.W)
        dB = np.sum(dA)
        dX = np.zeros_like(self.X)
        output_size = self.calculate_output_size(len(self.X), self.padding, self.filter_size, self.stride)

        for i in range(output_size):
            dW += dA[i] * self.X[i*self.stride:i*self.stride + self.filter_size]
            dX[i*self.stride:i*self.stride + self.filter_size] += dA[i] * self.W

        self.grads['W'] = dW
        self.grads['B'] = dB
        self.optimizer.update(self)
        return dX

    @staticmethod
    def calculate_output_size(N_in, P, F, S):
        """
        Calculate the output size after a 1D convolution.

        Parameters:
        N_in (int): Size of the input (number of features).
        P (int): Number of paddings in a direction.
        F (int): Filter size.
        S (int): Size of stride.

        Returns:
        int: Size of the output (number of features).
        """
        return (N_in + 2 * P - F) // S + 1


###Problem 3

In [4]:
class SimpleConv1d:
    def __init__(self, filter_size, stride=1, padding=0):
        self.filter_size = filter_size
        self.stride = stride
        self.padding = padding
        self.W = None
        self.B = None
        self.X = None

    def forward(self, X):
        self.X = np.pad(X, (self.padding, self.padding), 'constant')
        output_size = self.calculate_output_size(len(self.X), self.padding, self.filter_size, self.stride)
        out = np.zeros(output_size)

        for i in range(output_size):
            out[i] = np.sum(self.X[i*self.stride:i*self.stride + self.filter_size] * self.W) + self.B
        return out

    def backward(self, dA):
        dW = np.zeros_like(self.W)
        dB = np.sum(dA)
        dX = np.zeros_like(self.X)
        output_size = self.calculate_output_size(len(self.X), self.padding, self.filter_size, self.stride)

        for i in range(output_size):
            dW += dA[i] * self.X[i*self.stride:i*self.stride + self.filter_size]
            dX[i*self.stride:i*self.stride + self.filter_size] += dA[i] * self.W

        dX = dX[self.padding:len(dX) - self.padding]
        return dW, dB, dX

    @staticmethod
    def calculate_output_size(N_in, P, F, S):
        return (N_in + 2 * P - F) // S + 1


X = np.array([1, 2, 3, 4])
w = np.array([3, 5, 7])
b = 1


conv1d = SimpleConv1d(filter_size=len(w))
conv1d.W = w
conv1d.B = b


out = conv1d.forward(X)
print("Forward output:", out)


delta_a = np.array([10, 20])
dW, dB, dX = conv1d.backward(delta_a)
print("Backward dB:", dB)
print("Backward dW:", dW)
print("Backward dX:", dX)

Forward output: [35. 50.]
Backward dB: 30
Backward dW: [ 50  80 110]
Backward dX: [ 30 110 170 140]


###Problem 4

In [6]:
class Conv1d:
    def __init__(self, num_input_channels, num_output_channels, filter_size, stride=1, padding=0):
        self.num_input_channels = num_input_channels
        self.num_output_channels = num_output_channels
        self.filter_size = filter_size
        self.stride = stride
        self.padding = padding


        self.W = np.random.randn(num_output_channels, num_input_channels, filter_size) * np.sqrt(2 / (num_input_channels * filter_size))
        self.B = np.zeros(num_output_channels)

    def forward(self, X):
        self.X = np.pad(X, ((0, 0), (self.padding, self.padding)), 'constant').astype(float)
        num_features = self.X.shape[1]
        output_size = self.calculate_output_size(num_features, self.padding, self.filter_size, self.stride)
        out = np.zeros((self.num_output_channels, output_size))


        for out_channel in range(self.num_output_channels):
            for i in range(output_size):
                out[out_channel, i] = np.sum(
                    self.X[:, i*self.stride:i*self.stride + self.filter_size] * self.W[out_channel, :, :]
                ) + self.B[out_channel]
        return out

    def backward(self, dA):
        dW = np.zeros_like(self.W)
        dB = np.zeros_like(self.B)
        dX = np.zeros_like(self.X, dtype=float)

        output_size = dA.shape[1]

        for out_channel in range(self.num_output_channels):
            dB[out_channel] = np.sum(dA[out_channel])
            for i in range(output_size):
                dW[out_channel, :, :] += dA[out_channel, i] * self.X[:, i*self.stride:i*self.stride + self.filter_size]
                dX[:, i*self.stride:i*self.stride + self.filter_size] += dA[out_channel, i] * self.W[out_channel, :, :]

        if self.padding != 0:
            dX = dX[:, self.padding:-self.padding]
        return dW, dB, dX

    @staticmethod
    def calculate_output_size(N_in, P, F, S):
        return (N_in + 2 * P - F) // S + 1


X = np.array([[1, 2, 3, 4], [2, 3, 4, 5]])
w = np.ones((3, 2, 3))
b = np.array([1, 2, 3])


conv1d = Conv1d(num_input_channels=2, num_output_channels=3, filter_size=3)
conv1d.W = w
conv1d.B = b


out = conv1d.forward(X)
print("Forward output:\n", out)


delta_a = np.array([[10, 20], [10, 20], [10, 20]])
dW, dB, dX = conv1d.backward(delta_a)
print("Backward dB:\n", dB)
print("Backward dW:\n", dW)
print("Backward dX:\n", dX)

Forward output:
 [[16. 22.]
 [17. 23.]
 [18. 24.]]
Backward dB:
 [30 30 30]
Backward dW:
 [[[ 50.  80. 110.]
  [ 80. 110. 140.]]

 [[ 50.  80. 110.]
  [ 80. 110. 140.]]

 [[ 50.  80. 110.]
  [ 80. 110. 140.]]]
Backward dX:
 [[30. 90. 90. 60.]
 [30. 90. 90. 60.]]
