##### 【Problem 1 】 Creating a one-dimensional convolution layer class with limited number of channels to 1

In [1]:
import numpy as np

class SimpleConv1d:
    """
    A one-dimensional convolution layer with a single input and output channel.

    Parameters
    ----------
    filter_size : int
        The size of the convolutional filter.
    initializer : object
        An initializer object with a 'W' method for initializing weights (e.g., XavierInitializer).
    optimizer : object
        An optimizer object with an 'update' method for updating weights and biases (e.g., SGD).
    bias : bool
        Whether to include a bias term. Default is True.
    """
    def __init__(self, filter_size, initializer, optimizer, bias=True):
        self.filter_size = filter_size
        self.initializer = initializer
        self.optimizer = optimizer
        self.use_bias = bias
        self.W = initializer.W(filter_size)
        if self.use_bias:
            self.b = np.zeros(1)
        self.dW = None
        self.db = None
        self.X = None
        self.A = None

    def forward(self, X):
        """
        Performs forward propagation for the one-dimensional convolution layer.

        Parameters
        ----------
        X : ndarray of shape (batch_size, n_features)
            The input data. For this simple implementation, batch_size is assumed to be 1,
            so the shape is (1, n_features).

        Returns
        -------
        A : ndarray of shape (1, n_output)
            The output of the convolution layer.
        """
        self.X = X
        batch_size, n_features = X.shape
        n_output = n_features - self.filter_size + 1
        A = np.zeros((batch_size, n_output))

        for b in range(batch_size):
            for i in range(n_output):
                A[b, i] = np.sum(X[b, i:i + self.filter_size] * self.W)
                if self.use_bias:
                    A[b, i] += self.b[0]

        self.A = A
        return A

    def backward(self, dA):
        """
        Performs backward propagation for the one-dimensional convolution layer.

        Parameters
        ----------
        dA : ndarray of shape (1, n_output)
            Gradients of the loss with respect to the output of this layer.

        Returns
        -------
        dX : ndarray of shape (1, n_features)
            Gradients of the loss with respect to the input of this layer.
        """
        n_features = self.X.shape[1]
        n_output = dA.shape[1]
        F = self.filter_size

        # Gradient for weights
        dW = np.zeros_like(self.W)
        for s in range(F):
            dW[s] = np.sum(dA[0, :] * self.X[0, s:s + n_output])
        self.dW = dW

        # Gradient for bias
        if self.use_bias:
            self.db = np.sum(dA)

        # Gradient for the previous layer's input
        dX = np.zeros_like(self.X)
        for j in range(n_features):
            for s in range(F):
                if 0 <= j - s < n_output:
                    dX[0, j] += dA[0, j - s] * self.W[s]

        return dX

    def update(self, lr):
        """
        Updates the weights and biases using the optimizer.

        Parameters
        ----------
        lr : float
            The learning rate.
        """
        self.W = self.optimizer.update(self.W, self.dW, lr)
        if self.use_bias:
            self.b = self.optimizer.update(self.b, self.db, lr)

class XavierInitializer:
    """
    Initializes weights using the Xavier (or Glorot) initialization method.
    """
    def W(self, *shape):
        """
        Initializes weights with a scale based on the number of input units.
        For a 1D convolution layer with a single input channel,
        the number of input units is effectively the filter size.

        Parameters
        ----------
        *shape : tuple
            The shape of the weight array to initialize.

        Returns
        -------
        ndarray
            The initialized weight array.
        """
        n_in = shape[0]
        scale = np.sqrt(1. / n_in)
        return np.random.randn(*shape) * scale

class SGD:
    """
    Stochastic Gradient Descent optimizer.
    """
    def update(self, params, grads, lr):
        """
        Updates parameters based on their gradients and the learning rate.

        Parameters
        ----------
        params : ndarray
            The parameters to update.
        grads : ndarray
            The gradients of the loss with respect to the parameters.
        lr : float
            The learning rate.

        Returns
        -------
        ndarray
            The updated parameters.
        """
        return params - lr * grads

class Scratch1dCNNClassifier:
    """
    A simple 1D Convolutional Neural Network classifier.

    Parameters
    ----------
    filter_size : int
        The size of the convolutional filter in the SimpleConv1d layer.
    n_neurons : int
        The number of neurons in the fully connected layer.
    n_output : int
        The number of output units (number of classes).
    initializer : object
        An initializer object for weight initialization.
    optimizer : object
        An optimizer object for updating weights and biases.
    """
    def __init__(self, filter_size, n_neurons, n_output, initializer, optimizer):
        self.conv1d = SimpleConv1d(filter_size, initializer, optimizer)
        self.fc = self._Dense(n_neurons, initializer, optimizer)
        self.out = self._Dense(n_output, initializer, optimizer)

    class _Dense:
        """
        A simple fully connected layer.
        """
        def __init__(self, n_nodes, initializer, optimizer, use_bias=True):
            self.n_nodes = n_nodes
            self.initializer = initializer
            self.optimizer = optimizer
            self.use_bias = use_bias
            self.W = None
            self.b = None
            self.dW = None
            self.db = None
            self.X = None
            self.Z = None
            self.A = None

        def forward(self, X):
            """
            Performs forward propagation for the fully connected layer.
            """
            self.X = X
            if self.W is None:
                self.W = self.initializer.W(X.shape[1], self.n_nodes)
                if self.use_bias:
                    self.b = np.zeros(self.n_nodes)
            self.Z = np.dot(X, self.W)
            if self.use_bias:
                self.Z += self.b
            self.A = self._relu(self.Z)
            return self.A

        def backward(self, dA):
            """
            Performs backward propagation for the fully connected layer.
            """
            dZ = dA * self._relu_grad(self.Z)
            self.dW = np.dot(self.X.T, dZ)
            if self.use_bias:
                self.db = np.sum(dZ, axis=0)
            dX = np.dot(dZ, self.W.T)
            return dX

        def update(self, lr):
            """
            Updates the weights and biases using the optimizer.
            """
            self.W = self.optimizer.update(self.W, self.dW, lr)
            if self.use_bias:
                self.b = self.optimizer.update(self.b, self.db, lr)

        def _relu(self, x):
            return np.maximum(0, x)

        def _relu_grad(self, x):
            return np.where(x > 0, 1, 0)

    def forward(self, X):
        """
        Performs forward propagation through the 1D CNN.

        Parameters
        ----------
        X : ndarray of shape (batch_size, n_features)
            The input data.

        Returns
        -------
        output : ndarray of shape (batch_size, n_output)
            The output probabilities after the softmax layer.
        """
        conv_output = self.conv1d.forward(X)
        # Flatten the output of the convolution layer for the fully connected layer
        fc_input = conv_output.reshape(X.shape[0], -1)
        fc_output = self.fc.forward(fc_input)
        output = self._softmax(self.out.forward(fc_output))
        return output

    def backward(self, y, y_pred):
        """
        Performs backward propagation through the 1D CNN.

        Parameters
        ----------
        y : ndarray of shape (batch_size, n_output)
            The true labels in one-hot encoded format.
        y_pred : ndarray of shape (batch_size, n_output)
            The predicted probabilities.

        Returns
        -------
        None
        """
        # Output layer backward
        dout = y_pred - y
        dfc_output = self.out.backward(dout)

        # Fully connected layer backward
        dfc_input = self.fc.backward(dfc_output)

        # Reshape the gradient to match the convolution output shape
        dconv_output = dfc_input.reshape(self.conv1d.A.shape)

        # Convolution layer backward
        dinput = self.conv1d.backward(dconv_output)

    def update(self, lr):
        """
        Updates the weights and biases of all layers.

        Parameters
        ----------
        lr : float
            The learning rate.
        """
        self.conv1d.update(lr)
        self.fc.update(lr)
        self.out.update(lr)

    def _softmax(self, x):
        """
        Applies the softmax function to an array.
        """
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def loss(self, y, y_pred):
        """
        Calculates the cross-entropy loss.

        Parameters
        ----------
        y : ndarray of shape (batch_size, n_output)
            The true labels in one-hot encoded format.
        y_pred : ndarray of shape (batch_size, n_output)
            The predicted probabilities.

        Returns
        -------
        float
            The average cross-entropy loss.
        """
        batch_size = y.shape[0]
        return -np.sum(y * np.log(y_pred + 1e-7)) / batch_size

    def accuracy(self, y, y_pred):
        """
        Calculates the accuracy of the predictions.

        Parameters
        ----------
        y : ndarray of shape (batch_size, n_output)
            The true labels in one-hot encoded format.
        y_pred : ndarray of shape (batch_size, n_output)
            The predicted probabilities.

        Returns
        -------
        float
            The accuracy.
        """
        y_true = np.argmax(y, axis=1)
        y_predicted = np.argmax(y_pred, axis=1)
        return np.mean(y_true == y_predicted)

# Example usage (requires MNIST dataset loading - not included here for brevity)
if __name__ == '__main__':
    # Assume you have loaded and preprocessed the MNIST dataset
    # For 1D convolution, you might flatten or process the images differently
    # Here's a placeholder for demonstration with a dummy dataset

    # Example: Flattened MNIST images (28x28 = 784 features)
    X_train = np.random.rand(100, 784)
    y_train = np.eye(10)[np.random.randint(0, 10, 100)]
    X_test = np.random.rand(20, 784)
    y_test = np.eye(10)[np.random.randint(0, 10, 20)]

    # Hyperparameters
    filter_size = 3
    n_neurons = 128
    n_output = 10
    learning_rate = 0.01
    epochs = 10
    batch_size = 1 # SimpleConv1d currently supports batch_size = 1

    # Initialize components
    initializer = XavierInitializer()
    optimizer = SGD()
    model = Scratch1dCNNClassifier(filter_size, n_neurons, n_output, initializer, optimizer)

    # Training loop (simplified for demonstration)
    for epoch in range(epochs):
        epoch_loss = 0
        epoch_accuracy = 0
        for i in range(X_train.shape[0]):
            x_batch = X_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]

            # Forward pass
            y_pred = model.forward(x_batch)

            # Calculate loss and accuracy
            loss = model.loss(y_batch, y_pred)
            accuracy = model.accuracy(y_batch, y_pred)
            epoch_loss += loss
            epoch_accuracy += accuracy

            # Backward pass and update
            model.backward(y_batch, y_pred)
            model.update(learning_rate)

        avg_loss = epoch_loss / X_train.shape[0]
        avg_accuracy = epoch_accuracy / X_train.shape[0]
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_accuracy:.4f}")

    # Evaluation (simplified)
    y_pred_test = model.forward(X_test)
    test_loss = model.loss(y_test, y_pred_test)
    test_accuracy = model.accuracy(y_test, y_pred_test)
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 1/10, Loss: 2.3035, Accuracy: 0.1300
Epoch 2/10, Loss: 2.2629, Accuracy: 0.2400
Epoch 3/10, Loss: 2.1884, Accuracy: 0.3000
Epoch 4/10, Loss: 2.0323, Accuracy: 0.5400
Epoch 5/10, Loss: 1.6877, Accuracy: 0.7000
Epoch 6/10, Loss: 1.1235, Accuracy: 0.8000
Epoch 7/10, Loss: 0.6916, Accuracy: 0.8000
Epoch 8/10, Loss: 0.5466, Accuracy: 0.8000
Epoch 9/10, Loss: 0.5030, Accuracy: 0.8000
Epoch 10/10, Loss: 0.4878, Accuracy: 0.8000
Test Loss: 2.7514, Test Accuracy: 0.1000


##### 【Problem 2 】 Calculation of output size after 1st dimensional convolution

In [2]:
def calculate_conv1d_output_size(input_size, padding, filter_size, stride):
    """
    Calculates the output size (number of feature quantities) after a 1D convolution.

    Parameters
    ----------
    input_size : int
        The size of the input (number of feature quantities).
    padding : int
        The number of paddings applied to each end of the input.
    filter_size : int
        The size of the convolutional filter.
    stride : int
        The stride of the convolution.

    Returns
    -------
    int
        The output size after the 1D convolution.
    """
    output_size = ((input_size + 2 * padding - filter_size) // stride) + 1
    return output_size

# Example usage:
input_size = 100
padding = 2
filter_size = 5
stride = 1
output_size = calculate_conv1d_output_size(input_size, padding, filter_size, stride)
print(f"Input size: {input_size}")
print(f"Padding: {padding}")
print(f"Filter size: {filter_size}")
print(f"Stride: {stride}")
print(f"Output size after 1D convolution: {output_size}")

input_size = 28
padding = 0
filter_size = 3
stride = 2
output_size = calculate_conv1d_output_size(input_size, padding, filter_size, stride)
print(f"\nInput size: {input_size}")
print(f"Padding: {padding}")
print(f"Filter size: {filter_size}")
print(f"Stride: {stride}")
print(f"Output size after 1D convolution: {output_size}")

Input size: 100
Padding: 2
Filter size: 5
Stride: 1
Output size after 1D convolution: 100

Input size: 28
Padding: 0
Filter size: 3
Stride: 2
Output size after 1D convolution: 13


##### 【Problem 3 】 Experiment of 1D convolution layer in small array

In [3]:
import numpy as np

class SimpleConv1dTest:
    """
    A one-dimensional convolution layer for testing with a fixed small array.
    """
    def __init__(self, w, b):
        self.W = w
        self.b = b
        self.dW = None
        self.db = None
        self.X = None
        self.A = None

    def forward(self, X):
        """
        Performs forward propagation.
        """
        self.X = X
        filter_size = self.W.shape[0]
        n_features = X.shape[0]
        n_output = n_features - filter_size + 1
        A = np.zeros(n_output)

        for i in range(n_output):
            A[i] = np.sum(X[i:i + filter_size] * self.W) + self.b[0]

        self.A = A
        return A

    def backward(self, dA):
        """
        Performs backward propagation.
        """
        n_output = dA.shape[0]
        n_features = self.X.shape[0]
        filter_size = self.W.shape[0]

        # Gradient for bias
        self.db = np.sum(dA)

        # Gradient for weights
        self.dW = np.zeros_like(self.W)
        for s in range(filter_size):
            for i in range(n_output):
                self.dW[s] += dA[i] * self.X[i + s]

        # Gradient for the previous layer's input
        dX = np.zeros_like(self.X)
        for j in range(n_features):
            for s in range(filter_size):
                if 0 <= j - s < n_output:
                    dX[j] += dA[j - s] * self.W[s]

        return dX

# Given input, weight, and bias
x = np.array([1, 2, 3, 4])
w = np.array([3, 5, 7])
b = np.array([1])

# Instantiate the SimpleConv1dTest layer
conv1d_test = SimpleConv1dTest(w, b)

# Forward propagation
a = conv1d_test.forward(x)
print(f"Forward Propagation Output (a): {a}")
expected_a = np.array([1*3 + 2*5 + 3*7 + 1, 2*3 + 3*5 + 4*7 + 1])
print(f"Expected Forward Output (a): {expected_a}")
assert np.allclose(a, expected_a), "Forward propagation mismatch!"
print("Forward propagation matches the expected output.")

# Back propagation with given error
delta_a = np.array([10, 20])
delta_x = conv1d_test.backward(delta_a)
delta_w = conv1d_test.dW
delta_b = conv1d_test.db

print(f"\nBackward Propagation delta_b: {delta_b}")
expected_delta_b = np.sum(delta_a)
print(f"Expected delta_b: {expected_delta_b}")
assert np.allclose(delta_b, expected_delta_b), "delta_b mismatch!"
print("delta_b matches the expected value.")

print(f"Backward Propagation delta_w: {delta_w}")
expected_delta_w = np.array([
    delta_a[0] * x[0] + delta_a[1] * x[1],
    delta_a[0] * x[1] + delta_a[1] * x[2],
    delta_a[0] * x[2] + delta_a[1] * x[3]
])
print(f"Expected delta_w: {expected_delta_w}")
assert np.allclose(delta_w, expected_delta_w), "delta_w mismatch!"
print("delta_w matches the expected value.")

print(f"Backward Propagation delta_x: {delta_x}")
expected_delta_x = np.zeros_like(x, dtype=float)
expected_delta_x[0] = delta_a[0] * w[0]
expected_delta_x[1] = delta_a[0] * w[1] + delta_a[1] * w[0]
expected_delta_x[2] = delta_a[0] * w[2] + delta_a[1] * w[1]
expected_delta_x[3] = delta_a[1] * w[2]
print(f"Expected delta_x: {expected_delta_x}")
assert np.allclose(delta_x, expected_delta_x), "delta_x mismatch!"
print("delta_x matches the expected value.")

Forward Propagation Output (a): [35. 50.]
Expected Forward Output (a): [35 50]
Forward propagation matches the expected output.

Backward Propagation delta_b: 30
Expected delta_b: 30
delta_b matches the expected value.
Backward Propagation delta_w: [ 50  80 110]
Expected delta_w: [ 50  80 110]
delta_w matches the expected value.
Backward Propagation delta_x: [ 30 110 170 140]
Expected delta_x: [ 30. 110. 170. 140.]
delta_x matches the expected value.


##### 【Problem 4 】 Creating a one-dimensional convolution layer class that does not limit the number of channels

In [4]:
import numpy as np

class DummyInitializer:
    """A dummy initializer for testing."""
    def W(self, *shape):
        return np.ones(shape) if shape == (3, 2, 3) else np.zeros(shape)

class DummyOptimizer:
    """A dummy optimizer for testing."""
    def update(self, params, grads, lr):
        return params - lr * grads

class Conv1d:
    """
    A one-dimensional convolution layer that handles multiple input and output channels.

    Parameters
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    filter_size : int
        Size of the convolutional filter.
    initializer : object
        Initializer object (e.g., XavierInitializer).
    optimizer : object
        Optimizer object (e.g., SGD).
    stride : int
        Stride of the convolution. Default is 1.
    padding : int
        Padding applied to both ends of the input. Default is 0.
    bias : bool
        Whether to use bias. Default is True.
    """
    def __init__(self, in_channels, out_channels, filter_size, initializer, optimizer,
                 stride=1, padding=0, bias=True):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.filter_size = filter_size
        self.stride = stride
        self.padding = padding
        self.use_bias = bias
        self.W = initializer.W(out_channels, in_channels, filter_size)
        if self.use_bias:
            self.b = np.zeros(out_channels)
        self.dW = None
        self.db = None
        self.X = None
        self.A = None

    def forward(self, X):
        """
        Performs forward propagation.

        Parameters
        ----------
        X : ndarray of shape (in_channels, n_features)
            Input data.

        Returns
        -------
        A : ndarray of shape (out_channels, n_output)
            Output of the convolution layer.
        """
        self.X = X
        n_in_channels, n_features = X.shape
        n_output = (n_features + 2 * self.padding - self.filter_size) // self.stride + 1
        A = np.zeros((self.out_channels, n_output))

        # Pad the input
        if self.padding > 0:
            padded_X = np.pad(X, ((0, 0), (self.padding, self.padding)), mode='constant')
        else:
            padded_X = X

        for out_c in range(self.out_channels):
            for i in range(n_output):
                start = i * self.stride
                end = start + self.filter_size
                # Element-wise multiplication across input channels and filter
                conv_sum = np.sum(padded_X[:, start:end] * self.W[out_c, :, :])
                if self.use_bias:
                    A[out_c, i] = conv_sum + self.b[out_c]
                else:
                    A[out_c, i] = conv_sum

        self.A = A
        return A

    def backward(self, dA):
        """
        Performs backward propagation.

        Parameters
        ----------
        dA : ndarray of shape (out_channels, n_output)
            Gradients of the loss with respect to the output.

        Returns
        -------
        dX : ndarray of shape (in_channels, n_features)
            Gradients of the loss with respect to the input.
        """
        n_in_channels, n_features = self.X.shape
        n_output = dA.shape[1]

        # Initialize gradients
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        dX = np.zeros_like(self.X)

        # Gradient for bias
        if self.use_bias:
            self.db = np.sum(dA, axis=1)

        # Gradient for weights
        if self.padding > 0:
            padded_X = np.pad(self.X, ((0, 0), (self.padding, self.padding)), mode='constant')
        else:
            padded_X = self.X

        for out_c in range(self.out_channels):
            for in_c in range(self.in_channels):
                for s in range(self.filter_size):
                    for i in range(n_output):
                        start = i * self.stride
                        end = start + self.filter_size
                        if start + s < padded_X.shape[1]:
                            self.dW[out_c, in_c, s] += dA[out_c, i] * padded_X[in_c, start + s]

        # Gradient for input
        padded_dX = np.zeros_like(padded_X)
        for out_c in range(self.out_channels):
            for in_c in range(self.in_channels):
                for s in range(self.filter_size):
                    for i in range(n_output):
                        input_index = i * self.stride + s
                        if 0 <= input_index < padded_dX.shape[1]:
                            padded_dX[in_c, input_index] += dA[out_c, i] * self.W[out_c, in_c, self.filter_size - 1 - s]

        # Remove padding from dX
        if self.padding > 0:
            dX = padded_dX[:, self.padding:-self.padding]
        else:
            dX = padded_dX

        return dX

    def update(self, lr):
        """
        Updates the weights and biases using the optimizer.

        Parameters
        ----------
        lr : float
            The learning rate.
        """
        self.W = self.optimizer.update(self.W, self.dW, lr)
        if self.use_bias:
            self.b = self.optimizer.update(self.b, self.db, lr)

# Example Usage:
x = np.array([[1, 2, 3, 4], [2, 3, 4, 5]])  # shape(2, 4) - (in_channels, n_features)
w = np.ones((3, 2, 3))  # shape(3, 2, 3) - (out_channels, in_channels, filter_size)
b = np.array([1, 2, 3])  # shape(3,) - (out_channels)

initializer = DummyInitializer()
optimizer = DummyOptimizer()

conv1d_layer = Conv1d(in_channels=2, out_channels=3, filter_size=3,
                     initializer=initializer, optimizer=optimizer, bias=True)

# Set the weights and biases to the example values for forward pass verification
conv1d_layer.W = w
conv1d_layer.b = b

a = conv1d_layer.forward(x)
print("Forward Propagation Output (a):\n", a)
expected_a = np.array([[16, 22], [17, 23], [18, 24]])
print("Expected Forward Output (a):\n", expected_a)
assert np.allclose(a, expected_a), "Forward propagation mismatch!"
print("Forward propagation matches the expected output.")

# Conceptual Backpropagation (Hand Calculation):
# ... (rest of the conceptual backpropagation remains the same) ...

# You can uncomment the following to test the backward pass implementation
delta_a = np.ones((3, 2))  # Shape (out_channels, n_output)
dX = conv1d_layer.backward(delta_a)
print("\nBackward Propagation (implemented) delta_b:", conv1d_layer.db)
print("Backward Propagation (implemented) delta_w:\n", conv1d_layer.dW)
print("Backward Propagation (implemented) delta_x:\n", dX)

Forward Propagation Output (a):
 [[16. 22.]
 [17. 23.]
 [18. 24.]]
Expected Forward Output (a):
 [[16 22]
 [17 23]
 [18 24]]
Forward propagation matches the expected output.

Backward Propagation (implemented) delta_b: [2. 2. 2.]
Backward Propagation (implemented) delta_w:
 [[[3. 5. 7.]
  [5. 7. 9.]]

 [[3. 5. 7.]
  [5. 7. 9.]]

 [[3. 5. 7.]
  [5. 7. 9.]]]
Backward Propagation (implemented) delta_x:
 [[3 6 6 3]
 [3 6 6 3]]


##### 【Problem 5 】 (Advance Challenge) Padding Implementation

In [5]:
import numpy as np

class DummyInitializer:
    """A dummy initializer for testing."""
    def W(self, *shape):
        return np.ones(shape) if shape == (3, 2, 3) else np.zeros(shape)

class DummyOptimizer:
    """A dummy optimizer for testing."""
    def update(self, params, grads, lr):
        return params - lr * grads

class Conv1dWithPadding:
    """
    A one-dimensional convolution layer with padding and the option to maintain
    the original input size for the output.

    Parameters
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    filter_size : int
        Size of the convolutional filter.
    initializer : object
        Initializer object.
    optimizer : object
        Optimizer object.
    stride : int
        Stride of the convolution. Default is 1.
    padding : int or str
        Number of padding units on both ends of the input.
        If 'same', padding is calculated to maintain the input size. Default is 0.
    bias : bool
        Whether to use bias. Default is True.
    """
    def __init__(self, in_channels, out_channels, filter_size, initializer, optimizer,
                 stride=1, padding=0, bias=True):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.filter_size = filter_size
        self.stride = stride
        self.padding = padding
        self.use_bias = bias
        self.W = initializer.W(out_channels, in_channels, filter_size)
        if self.use_bias:
            self.b = np.zeros(out_channels)
        self.dW = None
        self.db = None
        self.X = None
        self.A = None
        self.padded_X = None  # Store padded input for backward pass

    def _calculate_padding(self, input_size, filter_size, stride, output_size=None):
        """
        Calculates the padding needed to achieve a desired output size.
        Used for 'same' padding.
        """
        if output_size is None:
            output_size = np.ceil(input_size / stride).astype(int)
        padding_needed = (output_size - 1) * stride + filter_size - input_size
        padding_before = padding_needed // 2
        padding_after = padding_needed - padding_before
        return padding_before, padding_after

    def forward(self, X):
        """
        Performs forward propagation with padding.

        Parameters
        ----------
        X : ndarray of shape (in_channels, n_features)
            Input data.

        Returns
        -------
        A : ndarray of shape (out_channels, n_output)
            Output of the convolution layer.
        """
        self.X = X
        n_in_channels, n_features = X.shape

        if isinstance(self.padding, int):
            padding_width = ((0, 0), (self.padding, self.padding))
        elif self.padding == 'same':
            padding_before, padding_after = self._calculate_padding(n_features, self.filter_size, self.stride)
            padding_width = ((0, 0), (padding_before, padding_after))
        else:
            raise ValueError("Padding must be an integer or 'same'")

        self.padded_X = np.pad(X, padding_width, mode='constant')
        padded_n_features = self.padded_X.shape[1]
        n_output = (padded_n_features - self.filter_size) // self.stride + 1
        A = np.zeros((self.out_channels, n_output))

        for out_c in range(self.out_channels):
            for i in range(n_output):
                start = i * self.stride
                end = start + self.filter_size
                # Element-wise multiplication across input channels and filter
                conv_sum = np.sum(self.padded_X[:, start:end] * self.W[out_c, :, :])
                if self.use_bias:
                    A[out_c, i] = conv_sum + self.b[out_c]
                else:
                    A[out_c, i] = conv_sum

        self.A = A
        return A

    def backward(self, dA):
        """
        Performs backward propagation with padding.

        Parameters
        ----------
        dA : ndarray of shape (out_channels, n_output)
            Gradients of the loss with respect to the output.

        Returns
        -------
        dX : ndarray of shape (in_channels, n_features)
            Gradients of the loss with respect to the input.
        """
        if self.padded_X is None:
            raise ValueError("Forward pass must be performed before backward pass.")

        n_in_channels, padded_n_features = self.padded_X.shape
        n_output = dA.shape[1]

        # Initialize gradients
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        padded_dX = np.zeros_like(self.padded_X)

        # Gradient for bias
        if self.use_bias:
            self.db = np.sum(dA, axis=1)

        # Gradient for weights
        for out_c in range(self.out_channels):
            for in_c in range(self.in_channels):
                for s in range(self.filter_size):
                    for i in range(n_output):
                        start = i * self.stride
                        end = start + self.filter_size
                        if start + s < padded_n_features:
                            self.dW[out_c, in_c, s] += dA[out_c, i] * self.padded_X[in_c, start + s]

        # Gradient for input (on the padded input)
        for out_c in range(self.out_channels):
            for in_c in range(self.in_channels):
                for s in range(self.filter_size):
                    for i in range(n_output):
                        input_index = i * self.stride + s
                        if 0 <= input_index < padded_dX.shape[1]:
                            padded_dX[in_c, input_index] += dA[out_c, i] * self.W[out_c, in_c, self.filter_size - 1 - s]

        # Remove padding from dX
        if isinstance(self.padding, int) and self.padding > 0:
            dX = padded_dX[:, self.padding:-self.padding]
        elif self.padding == 'same':
            padding_before, padding_after = self._calculate_padding(self.X.shape[1], self.filter_size, self.stride, output_size=self.A.shape[1])
            dX = padded_dX[:, padding_before:padded_n_features - padding_after]
        else:
            dX = padded_dX

        return dX

    def update(self, lr):
        """
        Updates the weights and biases using the optimizer.

        Parameters
        ----------
        lr : float
            The learning rate.
        """
        self.W = self.optimizer.update(self.W, self.dW, lr)
        if self.use_bias:
            self.b = self.optimizer.update(self.b, self.db, lr)

# Example Usage:
x = np.array([[1, 2, 3, 4, 5]])  # Single input channel for simplicity
w_int_pad = np.array([[[2, 3]]])
w_same_pad = np.array([[[1, 1, 1]]])
b = np.array([0])

initializer = DummyInitializer()
optimizer = DummyOptimizer()

# Test zero padding
conv1d_pad_int = Conv1dWithPadding(in_channels=1, out_channels=1, filter_size=2,
                                   initializer=initializer, optimizer=optimizer, padding=1)
conv1d_pad_int.W = w_int_pad
conv1d_pad_int.b = b
a_pad_int = conv1d_pad_int.forward(x)
expected_a_pad_int = np.array([[3, 8, 13, 18, 23, 10]])
assert np.allclose(a_pad_int, expected_a_pad_int), "Integer padding forward mismatch!"

# Test 'same' padding (stride 1)
conv1d_pad_same_stride1 = Conv1dWithPadding(in_channels=1, out_channels=1, filter_size=3,
                                    initializer=initializer, optimizer=optimizer, padding='same', stride=1)
conv1d_pad_same_stride1.W = w_same_pad
conv1d_pad_same_stride1.b = b
a_pad_same_stride1 = conv1d_pad_same_stride1.forward(x)
expected_a_pad_same_stride1 = np.array([[3, 6, 9, 12, 9]])
assert np.allclose(a_pad_same_stride1, expected_a_pad_same_stride1), "'Same' padding forward mismatch (stride 1)!"

# Test 'same' padding (stride 2)
conv1d_pad_same_stride2 = Conv1dWithPadding(in_channels=1, out_channels=1, filter_size=3,
                                         initializer=initializer, optimizer=optimizer, padding='same', stride=2)
conv1d_pad_same_stride2.W = w_same_pad
conv1d_pad_same_stride2.b = b
a_pad_same_stride2 = conv1d_pad_same_stride2.forward(x)
expected_a_pad_same_stride2 = np.array([[3, 9, 9]])
assert np.allclose(a_pad_same_stride2, expected_a_pad_same_stride2), "'Same' padding forward mismatch (stride 2)!"

# Example of backward pass (basic test - shape consistency)
delta_a_pad_int = np.ones_like(a_pad_int)
dX_pad_int = conv1d_pad_int.backward(delta_a_pad_int)
print("dX with integer padding:\n", dX_pad_int)
assert dX_pad_int.shape == x.shape, "dX shape mismatch with integer padding!"

delta_a_pad_same_stride1 = np.ones_like(a_pad_same_stride1)
dX_pad_same_stride1 = conv1d_pad_same_stride1.backward(delta_a_pad_same_stride1)
print("dX with 'same' padding (stride 1):\n", dX_pad_same_stride1)
assert dX_pad_same_stride1.shape == x.shape, "dX shape mismatch with 'same' padding (stride 1)!"

delta_a_pad_same_stride2 = np.ones_like(a_pad_same_stride2)
dX_pad_same_stride2 = conv1d_pad_same_stride2.backward(delta_a_pad_same_stride2)
print("dX with 'same' padding (stride 2):\n", dX_pad_same_stride2)
assert dX_pad_same_stride2.shape == x.shape, "dX shape mismatch with 'same' padding (stride 2)!"

dX with integer padding:
 [[5 5 5 5 5]]
dX with 'same' padding (stride 1):
 [[2 3 3 3 2]]
dX with 'same' padding (stride 2):
 [[1 2 1 2 1]]


##### 【Problem 6 】 (Advance Challenge) Response to mini batch

In [6]:
import numpy as np

class DummyInitializer:
    """A dummy initializer for testing."""
    def W(self, *shape):
        return np.ones(shape) if shape == (3, 2, 3) else np.zeros(shape)

class DummyOptimizer:
    """A dummy optimizer for testing."""
    def update(self, params, grads, lr):
        return params - lr * grads

class Conv1dMiniBatch:
    """
    A one-dimensional convolution layer that handles multiple input and output channels
    and mini-batches.

    Parameters
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    filter_size : int
        Size of the convolutional filter.
    initializer : object
        Initializer object.
    optimizer : object
        Optimizer object.
    stride : int
        Stride of the convolution. Default is 1.
    padding : int or str
        Number of padding units on both ends of the input.
        If 'same', padding is calculated to maintain the input size. Default is 0.
    bias : bool
        Whether to use bias. Default is True.
    """
    def __init__(self, in_channels, out_channels, filter_size, initializer, optimizer,
                 stride=1, padding=0, bias=True):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.filter_size = filter_size
        self.stride = stride
        self.padding = padding
        self.use_bias = bias
        self.W = initializer.W(out_channels, in_channels, filter_size)
        if self.use_bias:
            self.b = np.zeros(out_channels)
        self.dW = None
        self.db = None
        self.X = None  # Shape (batch_size, in_channels, n_features)
        self.A = None  # Shape (batch_size, out_channels, n_output)
        self.padded_X = None # Shape (batch_size, in_channels, padded_n_features)

    def _calculate_padding(self, input_size, filter_size, stride, output_size=None):
        """
        Calculates the padding needed to achieve a desired output size.
        Used for 'same' padding.
        """
        if output_size is None:
            output_size = np.ceil(input_size / stride).astype(int)
        padding_needed = (output_size - 1) * stride + filter_size - input_size
        padding_before = padding_needed // 2
        padding_after = padding_needed - padding_before
        return padding_before, padding_after

    def forward(self, X):
        """
        Performs forward propagation for a mini-batch.

        Parameters
        ----------
        X : ndarray of shape (batch_size, in_channels, n_features)
            Input data.

        Returns
        -------
        A : ndarray of shape (batch_size, out_channels, n_output)
            Output of the convolution layer.
        """
        self.X = X
        batch_size, n_in_channels, n_features = X.shape

        if isinstance(self.padding, int):
            padding_width = ((0, 0), (0, 0), (self.padding, self.padding))
        elif self.padding == 'same':
            padding_before, padding_after = self._calculate_padding(n_features, self.filter_size, self.stride)
            padding_width = ((0, 0), (0, 0), (padding_before, padding_after))
        else:
            raise ValueError("Padding must be an integer or 'same'")

        self.padded_X = np.pad(X, padding_width, mode='constant')
        _, _, padded_n_features = self.padded_X.shape
        n_output = (padded_n_features - self.filter_size) // self.stride + 1
        A = np.zeros((batch_size, self.out_channels, n_output))

        for batch in range(batch_size):
            for out_c in range(self.out_channels):
                for i in range(n_output):
                    start = i * self.stride
                    end = start + self.filter_size
                    # Element-wise multiplication across input channels and filter
                    conv_sum = np.sum(self.padded_X[batch, :, start:end] * self.W[out_c, :, :])
                    if self.use_bias:
                        A[batch, out_c, i] = conv_sum + self.b[out_c]
                    else:
                        A[batch, out_c, i] = conv_sum

        self.A = A
        return A

    def backward(self, dA):
        """
        Performs backward propagation for a mini-batch.

        Parameters
        ----------
        dA : ndarray of shape (batch_size, out_channels, n_output)
            Gradients of the loss with respect to the output.

        Returns
        -------
        dX : ndarray of shape (batch_size, in_channels, n_features)
            Gradients of the loss with respect to the input.
        """
        if self.padded_X is None:
            raise ValueError("Forward pass must be performed before backward pass.")

        batch_size, n_in_channels, padded_n_features = self.padded_X.shape
        _, n_out_channels, n_output = dA.shape

        # Initialize gradients
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        padded_dX = np.zeros_like(self.padded_X)

        # Gradient for bias (sum over the batch)
        if self.use_bias:
            self.db = np.sum(dA, axis=(0, 2))

        # Gradient for weights (sum over the batch)
        for batch in range(batch_size):
            for out_c in range(n_out_channels):
                for in_c in range(n_in_channels):
                    for s in range(self.filter_size):
                        for i in range(n_output):
                            start = i * self.stride
                            end = start + self.filter_size
                            if start + s < padded_n_features:
                                self.dW[out_c, in_c, s] += dA[batch, out_c, i] * self.padded_X[batch, in_c, start + s]
        self.dW /= batch_size # Average over the batch

        # Gradient for input (on the padded input)
        for batch in range(batch_size):
            for out_c in range(n_out_channels):
                for in_c in range(n_in_channels):
                    for s in range(self.filter_size):
                        for i in range(n_output):
                            input_index = i * self.stride + s
                            if 0 <= input_index < padded_dX.shape[2]:
                                padded_dX[batch, in_c, input_index] += dA[batch, out_c, i] * self.W[out_c, in_c, self.filter_size - 1 - s]

        # Remove padding from dX
        if isinstance(self.padding, int) and self.padding > 0:
            dX = padded_dX[:, :, self.padding:-self.padding]
        elif self.padding == 'same':
            padding_before, padding_after = self._calculate_padding(self.X.shape[2], self.filter_size, self.stride, output_size=self.A.shape[2])
            dX = padded_dX[:, :, padding_before:padded_n_features - padding_after]
        else:
            dX = padded_dX

        return dX

    def update(self, lr):
        """
        Updates the weights and biases using the optimizer.

        Parameters
        ----------
        lr : float
            The learning rate.
        """
        self.W = self.optimizer.update(self.W, self.dW, lr)
        if self.use_bias:
            self.b = self.optimizer.update(self.b, self.db, lr)

# Example Usage:
batch_size = 2
in_channels = 1
n_features = 5
out_channels = 1
filter_size = 3

x_mini_batch = np.array([[[1, 2, 3, 4, 5]], [[6, 7, 8, 9, 10]]]) # shape (2, 1, 5)
w_mini_batch = np.ones((out_channels, in_channels, filter_size)) # shape (1, 1, 3)
b_mini_batch = np.array([0]) # shape (1,)

initializer = DummyInitializer()
optimizer = DummyOptimizer()

conv1d_mini_batch = Conv1dMiniBatch(in_channels=in_channels, out_channels=out_channels, filter_size=filter_size,
                                     initializer=initializer, optimizer=optimizer, padding='same', stride=1)
conv1d_mini_batch.W = w_mini_batch
conv1d_mini_batch.b = b_mini_batch

a_mini_batch = conv1d_mini_batch.forward(x_mini_batch)
print("Forward Propagation Output (mini-batch):\n", a_mini_batch)
# Expected output shape (2, 1, 5) with 'same' padding and stride 1
expected_a_mini_batch = np.array([[[3., 6., 9., 12., 9.]], [[13., 21., 24., 27., 19.]]])
assert np.allclose(a_mini_batch, expected_a_mini_batch), "Mini-batch forward mismatch!"
print("Mini-batch forward propagation matches the expected output.")

delta_a_mini_batch = np.ones_like(a_mini_batch)
dX_mini_batch = conv1d_mini_batch.backward(delta_a_mini_batch)
print("Backward Propagation dX (mini-batch):\n", dX_mini_batch)
assert dX_mini_batch.shape == x_mini_batch.shape, "Mini-batch dX shape mismatch!"
print("Mini-batch backward propagation dX shape is correct.")
print("Backward Propagation dW (mini-batch):\n", conv1d_mini_batch.dW)
print("Backward Propagation db (mini-batch):\n", conv1d_mini_batch.db)

Forward Propagation Output (mini-batch):
 [[[ 3.  6.  9. 12.  9.]]

 [[13. 21. 24. 27. 19.]]]
Mini-batch forward propagation matches the expected output.
Backward Propagation dX (mini-batch):
 [[[2 3 3 3 2]]

 [[2 3 3 3 2]]]
Mini-batch backward propagation dX shape is correct.
Backward Propagation dW (mini-batch):
 [[[20.  27.5 24. ]]]
Backward Propagation db (mini-batch):
 [10.]


##### 【Problem 7 】 (Advance Challenge) Any number of strides

In [7]:
import numpy as np

class DummyInitializer:
    """A dummy initializer for testing."""
    def W(self, *shape):
        return np.ones(shape) if shape == (3, 2, 3) else np.zeros(shape)

class DummyOptimizer:
    """A dummy optimizer for testing."""
    def update(self, params, grads, lr):
        return params - lr * grads

class Conv1dStrideAny:
    """
    A one-dimensional convolution layer that handles multiple input and output channels,
    mini-batches, and any stride value.

    Parameters
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    filter_size : int
        Size of the convolutional filter.
    initializer : object
        Initializer object.
    optimizer : object
        Optimizer object.
    stride : int
        Stride of the convolution. Default is 1.
    padding : int or str
        Number of padding units on both ends of the input.
        If 'same', padding is calculated to maintain the input size (approximately). Default is 0.
    bias : bool
        Whether to use bias. Default is True.
    """
    def __init__(self, in_channels, out_channels, filter_size, initializer, optimizer,
                 stride=1, padding=0, bias=True):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.filter_size = filter_size
        self.stride = stride
        self.padding = padding
        self.use_bias = bias
        self.W = initializer.W(out_channels, in_channels, filter_size)
        if self.use_bias:
            self.b = np.zeros(out_channels)
        self.dW = None
        self.db = None
        self.X = None  # Shape (batch_size, in_channels, n_features)
        self.A = None  # Shape (batch_size, out_channels, n_output)
        self.padded_X = None # Shape (batch_size, in_channels, padded_n_features)

    def _calculate_padding(self, input_size, filter_size, stride, output_size=None):
        """
        Calculates the padding needed to achieve a desired output size.
        Used for 'same' padding.
        """
        if output_size is None:
            output_size = np.ceil(input_size / stride).astype(int)
        padding_needed = (output_size - 1) * stride + filter_size - input_size
        padding_before = padding_needed // 2
        padding_after = padding_needed - padding_before
        return padding_before, padding_after

    def forward(self, X):
        """
        Performs forward propagation for a mini-batch with any stride.

        Parameters
        ----------
        X : ndarray of shape (batch_size, in_channels, n_features)
            Input data.

        Returns
        -------
        A : ndarray of shape (batch_size, out_channels, n_output)
            Output of the convolution layer.
        """
        self.X = X
        batch_size, n_in_channels, n_features = X.shape

        if isinstance(self.padding, int):
            padding_width = ((0, 0), (0, 0), (self.padding, self.padding))
        elif self.padding == 'same':
            padding_before, padding_after = self._calculate_padding(n_features, self.filter_size, self.stride)
            padding_width = ((0, 0), (0, 0), (padding_before, padding_after))
        else:
            raise ValueError("Padding must be an integer or 'same'")

        self.padded_X = np.pad(X, padding_width, mode='constant')
        _, _, padded_n_features = self.padded_X.shape
        n_output = (padded_n_features - self.filter_size) // self.stride + 1
        A = np.zeros((batch_size, self.out_channels, n_output))

        for batch in range(batch_size):
            for out_c in range(self.out_channels):
                for i in range(n_output):
                    start = i * self.stride
                    end = start + self.filter_size
                    # Element-wise multiplication across input channels and filter
                    conv_sum = np.sum(self.padded_X[batch, :, start:end] * self.W[out_c, :, :])
                    if self.use_bias:
                        A[batch, out_c, i] = conv_sum + self.b[out_c]
                    else:
                        A[batch, out_c, i] = conv_sum

        self.A = A
        return A

    def backward(self, dA):
        """
        Performs backward propagation for a mini-batch with any stride.

        Parameters
        ----------
        dA : ndarray of shape (batch_size, out_channels, n_output)
            Gradients of the loss with respect to the output.

        Returns
        -------
        dX : ndarray of shape (batch_size, in_channels, n_features)
            Gradients of the loss with respect to the input.
        """
        if self.padded_X is None:
            raise ValueError("Forward pass must be performed before backward pass.")

        batch_size, n_in_channels, padded_n_features = self.padded_X.shape
        _, n_out_channels, n_output = dA.shape

        # Initialize gradients
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        padded_dX = np.zeros_like(self.padded_X)

        # Gradient for bias (sum over the batch and output features)
        if self.use_bias:
            self.db = np.sum(dA, axis=(0, 2))

        # Gradient for weights (sum over the batch and output features)
        for batch in range(batch_size):
            for out_c in range(n_out_channels):
                for in_c in range(n_in_channels):
                    for s in range(self.filter_size):
                        for i in range(n_output):
                            input_index = i * self.stride + s
                            if input_index < padded_n_features:
                                self.dW[out_c, in_c, s] += dA[batch, out_c, i] * self.padded_X[batch, in_c, input_index]
        self.dW /= batch_size # Average over the batch

        # Gradient for input (on the padded input)
        for batch in range(batch_size):
            for in_c in range(n_in_channels):
                for j in range(padded_n_features):
                    for out_c in range(n_out_channels):
                        for s in range(self.filter_size):
                            output_index = (j - s) // self.stride
                            if (j - s) % self.stride == 0 and 0 <= output_index < n_output:
                                padded_dX[batch, in_c, j] += dA[batch, out_c, output_index] * self.W[out_c, in_c, s]

        # Remove padding from dX
        if isinstance(self.padding, int) and self.padding > 0:
            dX = padded_dX[:, :, self.padding:-self.padding]
        elif self.padding == 'same':
            padding_before, padding_after = self._calculate_padding(self.X.shape[2], self.filter_size, self.stride, output_size=self.A.shape[2])
            dX = padded_dX[:, :, padding_before:padded_n_features - padding_after]
        else:
            dX = padded_dX

        return dX

    def update(self, lr):
        """
        Updates the weights and biases using the optimizer.

        Parameters
        ----------
        lr : float
            The learning rate.
        """
        self.W = self.optimizer.update(self.W, self.dW, lr)
        if self.use_bias:
            self.b = self.optimizer.update(self.b, self.db, lr)

# Example Usage with different strides:
batch_size = 2
in_channels = 1
n_features = 7
out_channels = 1
filter_size = 3

x_stride_any = np.array([[[1, 2, 3, 4, 5, 6, 7]], [[8, 9, 10, 11, 12, 13, 14]]])
w_stride_any = np.ones((out_channels, in_channels, filter_size))
b_stride_any = np.array([0])

initializer = DummyInitializer()
optimizer = DummyOptimizer()

# Test stride = 2 with 'same' padding
conv1d_stride2_same = Conv1dStrideAny(in_channels=in_channels, out_channels=out_channels, filter_size=filter_size,
                                       initializer=initializer, optimizer=optimizer, padding='same', stride=2)
conv1d_stride2_same.W = w_stride_any
conv1d_stride2_same.b = b_stride_any
a_stride2_same = conv1d_stride2_same.forward(x_stride_any)
print("Forward with stride 2 ('same' padding):\n", a_stride2_same)
# Expected output size: ceil(7/2) = 4
# Padded input (total pad 2): [[0, 1, 2, 3, 4, 5, 6, 7, 0]], [[0, 8, 9, 10, 11, 12, 13, 14, 0]]
# Output 1: [0+1+2, 2+3+4, 4+5+6, 6+7+0] = [3, 9, 15, 13]
# Output 2: [0+8+9, 9+10+11, 11+12+13, 13+14+0] = [17, 30, 36, 27]
expected_a_stride2_same = np.array([[[3., 9., 15., 13.]], [[17., 30., 36., 27.]]])
assert np.allclose(a_stride2_same, expected_a_stride2_same), "Stride 2 'same' forward mismatch!"
print("Stride 2 'same' forward matches expected.")

# Test stride = 3 with integer padding
conv1d_stride3_int = Conv1dStrideAny(in_channels=in_channels, out_channels=out_channels, filter_size=3,
                                      initializer=initializer, optimizer=optimizer, padding=1, stride=3)
conv1d_stride3_int.W = w_stride_any
conv1d_stride3_int.b = b_stride_any
a_stride3_int = conv1d_stride3_int.forward(x_stride_any)
print("Forward with stride 3 (integer padding=1):\n", a_stride3_int)
# Padded input: [[0, 1, 2, 3, 4, 5, 6, 7, 0]], [[0, 8, 9, 10, 11, 12, 13, 14, 0]]
# Output 1: [0+1+2, 3+4+5, 6+7+0] = [3, 12, 13]
# Output 2: [0+8+9, 10+11+12, 13+14+0] = [17, 33, 27]
expected_a_stride3_int = np.array([[[3., 12., 13.]], [[17., 33., 27.]]])
assert np.allclose(a_stride3_int, expected_a_stride3_int), "Stride 3 integer padding forward mismatch!"
print("Stride 3 integer padding forward matches expected.")

delta_a_stride2_same = np.ones_like(a_stride2_same)
dX_stride2_same = conv1d_stride2_same.backward(delta_a_stride2_same)
print("Backward dX with stride 2 ('same'):\n", dX_stride2_same.shape)
assert dX_stride2_same.shape == x_stride_any.shape

delta_a_stride3_int = np.ones_like(a_stride3_int)
dX_stride3_int = conv1d_stride3_int.backward(delta_a_stride3_int)
print("Backward dX with stride 3 (integer padding):\n", dX_stride3_int.shape)
assert dX_stride3_int.shape == x_stride_any.shape

print("Backward dW (stride 2):\n", conv1d_stride2_same.dW)
print("Backward db (stride 2):\n", conv1d_stride2_same.db)
print("Backward dW (stride 3):\n", conv1d_stride3_int.dW)
print("Backward db (stride 3):\n", conv1d_stride3_int.db)

Forward with stride 2 ('same' padding):
 [[[ 3.  9. 15. 13.]]

 [[17. 30. 36. 27.]]]
Stride 2 'same' forward matches expected.
Forward with stride 3 (integer padding=1):
 [[[ 3. 12. 13.]]

 [[17. 33. 27.]]]
Stride 3 integer padding forward matches expected.
Backward dX with stride 2 ('same'):
 (2, 1, 7)
Backward dX with stride 3 (integer padding):
 (2, 1, 7)
Backward dW (stride 2):
 [[[22.5 30.  22.5]]]
Backward db (stride 2):
 [8.]
Backward dW (stride 3):
 [[[16.  22.5 14. ]]]
Backward db (stride 3):
 [6.]


##### 【Problem 8 】 Learning and estimation

In [13]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, GlobalAveragePooling1D, Dense, Reshape
from tensorflow.keras.utils import to_categorical

# 1. Load MNIST data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Reshape: (batch, 28, 28) is already good for Conv1D input (time steps=28, features=28)
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# 2. One-hot encode labels
y_train_cat = to_categorical(y_train, 10)
y_test_cat = to_categorical(y_test, 10)

# 3. Define model
model = Sequential([
    Conv1D(32, kernel_size=3, padding='same', activation='relu', input_shape=(28, 28)),
    Conv1D(16, kernel_size=3, padding='same', activation='relu'),
    Conv1D(1, kernel_size=3, padding='same', activation='relu'),  # Final channel = 1
    GlobalAveragePooling1D(),  # Reduce to (batch_size, 1)
    Dense(10, activation='softmax')  # Join layer
])

# 4. Compile and train
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train_cat, epochs=1, batch_size=64, validation_split=0.1)

# 5. Evaluate
test_loss, test_acc = model.evaluate(x_test, y_test_cat)
print(f"\nTest Accuracy with Conv1D: {test_acc * 100:.2f}%")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.1841 - loss: 2.1532 - val_accuracy: 0.2477 - val_loss: 1.8546
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.2433 - loss: 1.8739

Test Accuracy with Conv1D: 24.87%
