In [234]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split 
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/digit-recognizer/sample_submission.csv
/kaggle/input/digit-recognizer/train.csv
/kaggle/input/digit-recognizer/test.csv


In [235]:
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
submission=pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')
submission

Unnamed: 0,ImageId,Label
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0
...,...,...
27995,27996,0
27996,27997,0
27997,27998,0
27998,27999,0


In [236]:
y=train["label"].to_numpy()
x=train.drop(columns=['label']).to_numpy()
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.20, random_state=0)

In [237]:
class Linear:
    def __init__(self, input_dim, output_dim):
        """
        Initialize the Linear layer with random weights and zero biases.

        Parameters:
        input_dim (int): Dimension of the input data.
        output_dim (int): Dimension of the output data.
        """
        self.W = np.random.randn(input_dim, output_dim) * 0.01
        self.b = np.zeros((1, output_dim))

    def forward(self, X):
        """
        Perform the forward pass of the Linear layer.

        Parameters:
        X (numpy.ndarray): Input data.

        Returns:
        numpy.ndarray: Output of the forward pass.
        """
        self.X = X
        return np.dot(X, self.W) + self.b

    def backward(self, dA):
        """
        Perform the backward pass of the Linear layer.

        Parameters:
        dA (numpy.ndarray): Gradient of the loss with respect to the output.

        Returns:
        numpy.ndarray: Gradient of the loss with respect to the input.
        """
        m = self.X.shape[0]
        self.dW = np.dot(self.X.T, dA) / m
        self.db = np.sum(dA, axis=0, keepdims=True) / m
        return np.dot(dA, self.W.T)

In [239]:
class ReLu:
    def forward(self, inputs):
        """
        Compute the forward pass of the ReLU activation function.

        Args:
            inputs (numpy.ndarray): The input array for the ReLU activation.

        Returns:
            numpy.ndarray: The output of the ReLU activation (same shape as input).
        """
        # Store the input for use in backward pass
        self.input = inputs
        # Compute the ReLU activation: max(0, input)
        self.output = np.maximum(0, inputs)
        return self.output
        
    def backward(self, gradient_output):
        """
        Compute the backward pass (gradient) of the ReLU activation function.

        Args:
            gradient_output (numpy.ndarray): The gradient of the loss with respect to the output of this layer.

        Returns:
            numpy.ndarray: The gradient of the loss with respect to the input of this layer.
        """
        # Compute the gradient of the ReLU function
        # If the input was positive, the gradient is 1; otherwise, it is 0
        self.diffv = np.where(self.input > 0, gradient_output, 0)
        return self.diffv


class Sigmoid:
    def forward(self, input):
        """
        Compute the forward pass of the Sigmoid activation function.

        Args:
            input (numpy.ndarray): The input array for the Sigmoid activation.

        Returns:
            numpy.ndarray: The output of the Sigmoid activation (same shape as input).
        """
        # Compute the Sigmoid activation: 1 / (1 + exp(-input))
        self.output = 1 / (1 + np.exp(-input))
        return self.output
        
    def backward(self, gradient_output):
        """
        Compute the backward pass (gradient) of the Sigmoid activation function.

        Args:
            gradient_output (numpy.ndarray): The gradient of the loss with respect to the output of this layer.

        Returns:
            numpy.ndarray: The gradient of the loss with respect to the input of this layer.
        """
        # Compute the gradient of the Sigmoid function
        # The gradient is: gradient_output * (1 - output) * output
        self.diffv = gradient_output * (1 - self.output) * self.output
        return self.diffv


class Tanh:
    def forward(self, input):
        """
        Compute the forward pass of the Tanh activation function.

        Args:
            input (numpy.ndarray): The input array for the Tanh activation.

        Returns:
            numpy.ndarray: The output of the Tanh activation (same shape as input).
        """
        # Store the input for use in backward pass
        self.input = input
        # Compute the Tanh activation: np.tanh(input)
        self.output = np.tanh(self.input)
        return self.output
        
    def backward(self, gradient_output):
        """
        Compute the backward pass (gradient) of the Tanh activation function.

        Args:
            gradient_output (numpy.ndarray): The gradient of the loss with respect to the output of this layer.

        Returns:
            numpy.ndarray: The gradient of the loss with respect to the input of this layer.
        """
        # Compute the gradient of the Tanh function
        # The gradient is: gradient_output * (1 - np.power(output, 2))
        self.diffv = gradient_output * (1.0 - np.power(self.output, 2))
        return self.diffv


In [240]:
class Softmax:
    def forward(self, inputs):
        """
        Perform the forward pass of the Softmax activation function.

        Args:
            inputs (np.ndarray): Input data of shape (batch_size, num_classes).

        Returns:
            np.ndarray: Output data after applying the Softmax activation function, of the same shape as the input.
        """
        # Compute exponentiated values, with numerical stability
        exp_inputs = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        # normalize by the sum of all the exp values
        self.outputs = exp_inputs / np.sum(exp_inputs, axis=1, keepdims=True)
        return self.outputs

    def backward(self, d_outputs):
        """
        Perform the backward pass of the Softmax activation function to compute gradients.

        Args:
            d_outputs (np.ndarray): Gradient of the loss with respect to the output of this layer, of shape (batch_size, num_classes).

        Returns:
            np.ndarray: Gradient of the loss with respect to the input of this layer, of the same shape as the input.
        """
        batch_size = self.outputs.shape[0]
        num_classes = self.outputs.shape[1]

        # Initialize gradient of the input
        d_inputs = np.zeros_like(d_outputs)

        # Compute gradients for each sample in the batch
        for i in range(batch_size):
            single_output = self.outputs[i].reshape(-1, 1)
            single_grad_output = d_outputs[i]

            # Jacobian matrix for the softmax function
            jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)
            # Compute gradient for the current sample
            d_inputs[i] = np.dot(jacobian_matrix, single_grad_output)

        return d_inputs


In [241]:
class CrossEntropyLoss:
    def forward(self, targets, predictions):
        """
        Perform the forward pass of the Cross-Entropy Loss function.

        Args:
            targets (np.ndarray): True labels, one-hot encoded, of shape (batch_size, num_classes).
            predictions (np.ndarray): Predicted probabilities, of shape (batch_size, num_classes).

        Returns:
            float: The computed cross-entropy loss.
        """
        # Ensure numerical stability by subtracting the max value from predictions
        p_max = np.max(predictions, axis=1, keepdims=True)
        exps = np.exp(predictions - p_max)
        self.softmax = exps / np.sum(exps, axis=1, keepdims=True)
        self.targets = targets

        # Compute the loss using the cross-entropy formula
        batch_size = predictions.shape[0]
        self.loss = -np.sum(targets * np.log(self.softmax + 1e-10)) / batch_size  # Add epsilon for numerical stability
        return self.loss

    def backward(self):
        """
        Perform the backward pass of the Cross-Entropy Loss function to compute gradients.

        Returns:
            np.ndarray: Gradient of the loss with respect to the predictions, of shape (batch_size, num_classes).
        """
        batch_size = self.targets.shape[0]
        # Compute gradient of the loss with respect to the predictions
        d_predictions = (self.softmax - self.targets) / batch_size
        return d_predictions



In [252]:
class SGD:
    def __init__(self, learning_rate):
        """
        Initialize the Stochastic Gradient Descent (SGD) optimizer.

        Args:
            learning_rate (float): Learning rate for the optimizer.
        """
        self.learning_rate = learning_rate

    def step(self, layers):
        """
        Perform a single optimization step by updating the weights and biases of the given layers.

        Args:
            layers (list of objects): List of layers in the network. Each layer should have attributes
                                      `W` (weights), `b` (biases), `dW` (gradient of weights), and `db` (gradient of biases).

        This method iterates over each layer and updates its weights and biases using the computed gradients.
        """
        for layer in layers:
            if hasattr(layer, 'W'):
                # Update weights and biases for layers with weight and bias attributes
                layer.W -= self.learning_rate * layer.dW
                layer.b -= self.learning_rate * layer.db

class one_hot:
    def __init__(self,num_classes):
        self.num_classes=num_classes
        
    
    def one_hot_to_label(self,one_hot_matrix):
        """
        Convert a one-hot encoded matrix to class labels.

        Args:
            y_one_hot (np.ndarray): One-hot encoded array of shape (num_samples, num_classes).

        Returns:
            np.ndarray: Array of class labels of shape (num_samples,).
        """
        return np.argmax(one_hot_matrix, axis=1)

    def convert_to_one_hot(self,vector):
        """
        Convert a vector of integer class labels to one-hot encoded format.

        Args:
            vector (np.ndarray): 1-D array of integer class labels, shape (num_samples,).
            num_classes (int, optional): Number of classes. If None, it is set to the maximum value in the vector + 1.

        Returns:
            np.ndarray: 2-D array of one-hot encoded labels, shape (num_samples, num_classes).
        """
        result = np.zeros((len(vector), self.num_classes), dtype=int)
        result[np.arange(len(vector)), vector] = 1
        return result
  

In [251]:
class Model:
    def __init__(self):
        """
        Initialize the Model class.
        This class manages the layers, loss function, and optimizer for training and inference.
        """
        self.layers = []

    def add_layer(self, layer):
        """
        Add a layer to the model.

        Args:
            layer (object): A layer object that has `forward` and `backward` methods. The layer should 
                            also have attributes like `W` and `b` if it contains learnable parameters.
        """
        self.layers.append(layer)

    def compile(self, loss, optimizer):
        """
        Compile the model by specifying the loss function and optimizer.

        Args:
            loss (object): An instance of a loss class that has `forward` and `backward` methods.
            optimizer (object): An instance of an optimizer class that has a `step` method.
        """
        self.loss = loss
        self.optimizer = optimizer

    def forward(self, X):
        """
        Perform a forward pass through the model.

        Args:
            X (np.ndarray): Input data of shape (batch_size, ...).

        Returns:
            np.ndarray: The output of the model after passing through all layers.
        """
        for layer in self.layers:
            X = layer.forward(X)
        return X

    def backward(self, dA):
        """
        Perform a backward pass through the model to compute gradients.

        Args:
            dA (np.ndarray): Gradient of the loss with respect to the model's output.

        Returns:
            None: The method updates the gradients of the layers in place.
        """
        for layer in reversed(self.layers):
            dA = layer.backward(dA)

    def train(self, X, y, epochs, batch_size):
        """
        Train the model using mini-batch gradient descent.

        Args:
            X (np.ndarray): Training data of shape (num_samples, ...).
            y (np.ndarray): True labels, one-hot encoded, of shape (num_samples, num_classes).
            epochs (int): Number of training epochs.
            batch_size (int): Size of each mini-batch.
        """
        num_samples = X.shape[0]
        for epoch in range(epochs):
            for i in range(0, num_samples, batch_size):
                X_batch = X[i:i+batch_size]
                y_batch = y[i:i+batch_size]

                # Forward pass
                y_pred = self.forward(X_batch)

                # Compute loss
                loss = self.loss.forward(y_batch, y_pred)

                # Backward pass
                dA = self.loss.backward()
                self.backward(dA)

                # Update weights and biases
                self.optimizer.step(self.layers)

            print(f'Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}')

    def predict(self, X):
        """
        Make predictions using the trained model.

        Args:
            X (np.ndarray): Input data of shape (num_samples, ...).

        Returns:
            np.ndarray: Predicted probabilities of shape (num_samples, num_classes).
        """
        return self.forward(X)

    def evaluate(self, X, y):
        """
        Evaluate the model on a test set.

        Args:
            X (np.ndarray): Test data of shape (num_samples, ...).
            y (np.ndarray): True labels, one-hot encoded, of shape (num_samples, num_classes).

        Returns:
            tuple: A tuple containing:
                - np.ndarray: Predicted probabilities of shape (num_samples, num_classes).
                - float: Loss value on the test set.
                - float: Accuracy percentage on the test set.
        """
        y_pred = self.predict(X)
        loss = self.loss.forward(y, y_pred)
        accuracy = np.mean(np.argmax(y_pred, axis=1) == np.argmax(y, axis=1)) * 100
        return loss, accuracy

    def save(self, path):
        """
        Save the model weights to a file.

        Args:
            path (str): Path to the file where the model weights and bias will be saved.
        """
        weights_bias = {}
        for i, layer in enumerate(self.layers):
            if hasattr(layer, 'W'):
                weights_bias[f'W{i+1}'] = layer.W
                weights_bias[f'b{i+1}'] = layer.b
        np.savez(path, **weights_bias)

    def load(self, path):
        """
        Load a model from a file.

        Args:
            path (str): Path to the file where the model is saved.

        Returns:
            Model: The loaded model
        """
        data = np.load(path)
        weights_bias = {key: data[key] for key in data.files}
        
        for i, layer in enumerate(self.layers):
            if hasattr(layer, 'W'):
                key_W = f'W{i+1}'
                key_b = f'b{i+1}'
                layer.W = weights_bias[key_W]
                layer.b = weights_bias[key_b]

In [253]:
model = Model()
model.add_layer(Linear(784, 128))
model.add_layer(ReLU())
model.add_layer(Linear(128, 10))
model.add_layer(Softmax())


loss = CrossEntropyLoss()
optimizer = SGD(learning_rate=.1)
model.compile(loss, optimizer)
one_hot=one_hot(10)

y_train_one_hot=one_hot.convert_to_one_hot(y_train)
y_test_one_hot=one_hot.convert_to_one_hot(y_test)
# Assume x_train, y_train, x_test, y_test are preprocessed and available
model.train(x_train, y_train_one_hot, epochs=20, batch_size=64)
test_loss, test_accuracy = model.evaluate(x_test, y_test_one_hot)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')
model.train(x_test,y_test_one_hot,epochs=20,batch_size=42)
test_loss, test_accuracy  = model.evaluate(x_test, y_test_one_hot)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')
test_array=model.predict(test)
y_ans=one_hot.one_hot_to_label(test_array)
ans=pd.DataFrame({
    'ImageId':submission["ImageId"].to_numpy(),
    'Label':y_ans
})

Epoch 1/20, Loss: 1.5449
Epoch 2/20, Loss: 1.5137
Epoch 3/20, Loss: 1.5102
Epoch 4/20, Loss: 1.5088
Epoch 5/20, Loss: 1.5077
Epoch 6/20, Loss: 1.5047
Epoch 7/20, Loss: 1.4998
Epoch 8/20, Loss: 1.4946
Epoch 9/20, Loss: 1.4926
Epoch 10/20, Loss: 1.4909
Epoch 11/20, Loss: 1.4884
Epoch 12/20, Loss: 1.4865
Epoch 13/20, Loss: 1.4857
Epoch 14/20, Loss: 1.4851
Epoch 15/20, Loss: 1.4840
Epoch 16/20, Loss: 1.4822
Epoch 17/20, Loss: 1.4812
Epoch 18/20, Loss: 1.4810
Epoch 19/20, Loss: 1.4810
Epoch 20/20, Loss: 1.4807
Test Loss: 1.5018744449324075, Test Accuracy: 96.16666666666667
Epoch 1/20, Loss: 1.4811
Epoch 2/20, Loss: 1.4688
Epoch 3/20, Loss: 1.4616
Epoch 4/20, Loss: 1.4615
Epoch 5/20, Loss: 1.4619
Epoch 6/20, Loss: 1.4623
Epoch 7/20, Loss: 1.4620
Epoch 8/20, Loss: 1.4619
Epoch 9/20, Loss: 1.4618
Epoch 10/20, Loss: 1.4619
Epoch 11/20, Loss: 1.4616
Epoch 12/20, Loss: 1.4616
Epoch 13/20, Loss: 1.4615
Epoch 14/20, Loss: 1.4614
Epoch 15/20, Loss: 1.4614
Epoch 16/20, Loss: 1.4614
Epoch 17/20, Loss:

In [254]:
ans




Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,9
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9


In [255]:
ans.to_csv('/kaggle/working/submission.csv',index=False)

In [256]:
path="/kaggle/working/model.npz"
model.save(path)

In [257]:
model1=Model()
model1.add_layer(Linear(784, 128))
model1.add_layer(ReLU())
model1.add_layer(Linear(128, 10))
model1.add_layer(Softmax())
model1.load("/kaggle/working/model.npz")

In [258]:
loss = CrossEntropyLoss()
optimizer = SGD(learning_rate=.01)
model1.compile(loss, optimizer)
test_loss, test_accuracy  = model1.evaluate(x_test, y_test_one_hot)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')

Test Loss: 1.4756042990686693, Test Accuracy: 98.59523809523809
