In [1]:
import torch
import math
import torchvision
from torchvision.datasets import MNIST
import torchvision.transforms as T
from PIL import Image

# Set the device
device = "mps" if torch.backends.mps.is_available() else "cpu"

In [2]:
class ConvLayer:
    def __init__(
        self,
        filter_size: int,
        n_filters: int,
        stride=1,
        padding=1,
        input_dim=(1, 28, 28),
        activation=torch.relu,
    ) -> None:
        """Create a Convolutional Layer

        Args:
            filter_size (int): Filter Size. Eg: 5 = 5 x 5
            n_filters (int): Number of filters.
            stride (int): Stride. Defaults to 1
            padding (int): Padding. Pad of 1 adds pad all around the image. Defaults to 1
            input_dim (tuple): Channels * Height * Breadth. Defaults to (1, 28, 28)
        """
        self.stride = stride
        self.padding = padding
        self.input_dim = input_dim
        self.filter_size = filter_size
        self.n_filters = n_filters
        self.params_initialized = False
        self.filters = []
        self.filter_biases = []
        
    def initialize_params(self) -> None:
        """Initialize Params
        """
        # Create filters
        self.filters = [
            torch.randn(
                self.input_dim[0],
                self.filter_size,
                self.filter_size,
                requires_grad=True,
                dtype=torch.float32,
                device=device
            )
            for _ in range(self.n_filters)
        ]
        self.filter_biases = [
            torch.randn(1, requires_grad=True, dtype=torch.float32, device=device)
            for _ in range(self.n_filters)
        ]
        self.params_initialized = True

    @staticmethod
    def pad_img_tensor(img_tensor: torch.tensor, padding: int) -> torch.tensor:
        """Pad Image Tensor Object

        Args:
            img_tensor (torch.tensor): Image Tensor to pad with
            padding (int): Padding for the tensor

        Returns:
            torch.tensor: Padded tensor
        """
        img_tensor_shape = img_tensor.shape
        padded_tensor = torch.zeros(
            img_tensor_shape[0],
            img_tensor_shape[1] + 2 * padding,
            img_tensor_shape[2] + 2 * padding,
            device=device
        )
        for channel_idx in range(img_tensor_shape[0]):
            for row_idx in range(img_tensor_shape[1]):
                padded_tensor[channel_idx, row_idx + 1, 1:-1] = img_tensor[
                    channel_idx, row_idx
                ]
        return padded_tensor

    @staticmethod
    def tensor_to_image(img_tensor: torch.tensor) -> T.ToPILImage:
        """Conver tensor to a PIL image for visualization

        Args:
            img_tensor (torch.tensor): Image Tensor

        Returns:
            IMAGE: PIL Image
        """
        transform = T.ToPILImage()
        img = transform(img_tensor)
        return img

    def get_model_params(self) -> list:
        """Returns a list of Model Params

        Returns:
            list: List of Model params
        """
        return [self.filters, self.filter_biases]

    def forward_pass(self, input_tensor: torch.tensor) -> torch.tensor:
        """Forward Pass for the layer (Convolution over the image)

        Args:
            input_tensor (torch.tensor): Image Tensor

        Returns:
            torch.tensor: Activation map of the layer.
        """
        if not self.params_initialized:
            self.initialize_params()

        if self.padding > 0:
            input_tensor = self.pad_img_tensor(input_tensor, self.padding)

        img_channels = self.input_dim[0]
        img_size = self.input_dim[1] + 2 * self.padding # Padding is also included now
        output_size = ((img_size - self.filter_size) / self.stride) + 1
        
        # Return None if unsupported
        if output_size % 1 != 0:
            print("Kernel Size not applicable.")
            return None

        output_size = int(output_size)
        activation_map = torch.zeros(
            len(self.filters), output_size, output_size, device=device
        )

        for filter_idx in range(len(self.filters)):
            for row_idx in range(
                0, img_size - self.filter_size + 1, self.stride
            ):
                for col_idx in range(
                    0, img_size - self.filter_size + 1, self.stride
                ):
                    flatten_size = (
                        img_channels * self.filter_size * self.filter_size
                    )
                    activation_map[
                        filter_idx, row_idx, col_idx
                    ] = torch.relu(
                        input_tensor[
                            :,
                            row_idx : row_idx + self.filter_size,
                            col_idx : col_idx + self.filter_size,
                        ].reshape(1, flatten_size)
                        @ self.filters[filter_idx].reshape(flatten_size, 1)
                        + self.filter_biases[filter_idx]
                    )
        return activation_map

In [3]:
# Sequential Layer
class SequentialLayer:
    def __init__(
        self,
        neurons_per_layer: int,
        activation=torch.tanh,
        n_inputs_per_neuron=0,
    ) -> None:
        """Sequential Layer

        Args:
            neurons_per_layer (int): Neurons in the layer
            activation (torch.<ActivationFunction>, None): Activation Function for Layer.
                Defaults to torch.tanh. Set None for no activation
            n_inputs_per_neuron (int): Inputs per neuron in the layer
        """
        self.n_inputs_per_neuron = n_inputs_per_neuron
        self.neurons_per_layer = neurons_per_layer
        self.activation = activation
        self.params_initialized = False
        self.weights = None
        self.biases = None

    def initialize_params(self) -> None:
        """Initialize Model Params"""
        self.weights = torch.randn(
            self.neurons_per_layer,
            self.n_inputs_per_neuron,
            requires_grad=True,
            dtype=torch.float32,
        )
        self.biases = torch.randn(
            self.neurons_per_layer, 1, requires_grad=True, dtype=torch.float32
        )
        self.params_initialized = True

    def return_model_params(self) -> list:
        """Returns a list of model params

        Returns:
            list: List of model params
        """
        return [self.weights, self.biases]

    def forward_pass(self, x: torch.tensor) -> torch.tensor:
        """Forward Pass of the Layer

        Args:
            x (torch.tensor): Input to the layer

        Returns:
            torch.tensor: Output of the layer
        """
        if not self.params_initialized:
            self.initialize_params
        
        if not torch.is_tensor(x):
            print(
                f"Found a non-tensor object: {type(x)}. Converting to tensor."
            )
            x = torch.tensor(x)

        flattened_x = x.flatten()
        # Get weighted sum + bias
        weighted_sum = (
            self.weights @ flattened_x.reshape(flattened_x.shape[0], 1)
            + self.biases
        )
        if self.activation is None:
            return weighted_sum
        else:
            return self.activation(weighted_sum)

In [4]:
class ConvNet:
    def __init__(self) -> None:
        self.layers = []

    def add_layer(self, layer) -> None:
        """Add Layers to ConvNet Model

        Args:
            layer (ConvLayer, Sequential): Layer to add to the model
        """
        if len(self.layers) > 0:
            last_layer = self.layers[-1]
            # If last layer is convolutional layer
            if isinstance(last_layer, ConvLayer):
                input_dim_for_curr_layer = (
                    len(last_layer.filters),
                    last_layer.input_dim[1],
                    last_layer.input_dim[2],
                )
                # If current layer is also conv layer set the correct input dim
                if isinstance(layer, ConvLayer):
                    layer.input_dim = input_dim_for_curr_layer

                elif isinstance(layer, SequentialLayer):
                    # Get the flatten layer dimension
                    flattened_n = math.prod(
                        input_dim_for_curr_layer
                    )
                    print(f"flattened n: {flattened_n}")
                    layer.n_input_per_neuron = 

            # If last layer was convolutional
            elif isinstance(last_layer, SequentialLayer) and isinstance(
                layer, SequentialLayer
            ):
                # Set inputs of current sequential layer to the number of outputs of the last layer
                layer.n_input_per_neuron = last_layer.neurons_per_layer

        elif isinstance(layer, SequentialLayer):
            print("Can't Start with a Sequential Layer.")
            return None

        layer.initialize_params()
        self.layers.append(layer)
    
    def forward(self, x_input: torch.tensor) -> torch.tensor:
        """Do forward pass of the entire model

        Args:
            x_input (torch.tensor): Input Image

        Returns:
            torch.tensor: Output of the forward pass
        """
        layer_input = x_input
        last_layer_type = ConvLayer
        for layer in self.layers:
            if isinstance(layer, SequentialLayer) and isinstance(layer, ConvLayer):
                x_input = x_input.flatten()
            layer_input = layer.forward_pass(layer_input)
            last_layer_type = type(layer)
        
        return layer_input


SyntaxError: invalid syntax (521731785.py, line 30)

In [105]:
mnist_trainset = MNIST(root='./data', train=True, download=True, transform=torchvision.transforms.ToTensor()
                    )

In [121]:
conv_net = ConvNet()

# Add Some Conv Layers
conv_net.add_layer(ConvLayer(input_dim=(1, 28, 28), filter_size=3, n_filters=16))
conv_net.add_layer(ConvLayer(filter_size=3, n_filters=32))
conv_net.add_layer(ConvLayer(filter_size=3, n_filters=64))
conv_net.add_layer(ConvLayer(filter_size=3, n_filters=128))
conv_net.add_layer(SequentialLayer(100, activation=torch.relu))
# conv_net.add_layer(SequentialLayer(10, activation=None))


In [122]:
conv_net.layers[-1].weights

tensor([], size=(100, 0), requires_grad=True)

In [123]:
x = conv_net.forward(mnist_trainset[0][0])

RuntimeError: mat1 and mat2 shapes cannot be multiplied (100x0 and 100352x1)

In [117]:
x.shape

torch.Size([128, 28, 28])

In [100]:
b = ConvLayer(filter_size=3, n_filters=32, input_dim=(16, 28, 28))

In [101]:
b.forward_pass(x)

Set Image Channels: 16; Img Size: 30
Curr Image Channels: 16; Img Size: 30


tensor([[[ 8.9977,  6.4394,  6.4394,  ...,  6.4394,  6.4394,  0.8392],
         [ 7.4137,  5.5592,  5.5592,  ...,  5.5592,  5.5592,  0.0000],
         [ 7.4137,  5.5592,  5.5592,  ...,  5.5592,  5.5592,  0.0000],
         ...,
         [ 7.4137,  5.5592,  5.4245,  ...,  5.5592,  5.5592,  0.0000],
         [ 7.4137,  5.5592,  4.2788,  ...,  5.5592,  5.5592,  0.0000],
         [ 1.1926,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 3.3620,  5.3266,  5.3266,  ...,  5.3266,  5.3266,  0.8295],
         [ 8.6206,  7.6284,  7.6284,  ...,  7.6284,  7.6284,  0.2067],
         [ 8.6206,  7.6284,  7.6284,  ...,  7.6284,  7.6284,  0.2067],
         ...,
         [ 8.6206,  7.6284,  5.7642,  ...,  7.6284,  7.6284,  0.2067],
         [ 8.6206,  7.6284,  7.7404,  ...,  7.6284,  7.6284,  0.2067],
         [10.9487,  9.7599,  9.7599,  ...,  9.7599,  9.7599,  4.1060]],

        [[10.1675,  5.4927,  5.4927,  ...,  5.4927,  5.4927,  2.9021],
         [10.0439,  4.4333,  4.4333,  ...,  4

In [71]:
conv_net.layers[1].filters

[tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0, 3, 3), requires_grad=True),
 tensor([], size=(0,