In [178]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import pickle
from sklearn.model_selection import train_test_split 
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/digit-recognizer/sample_submission.csv
/kaggle/input/digit-recognizer/train.csv
/kaggle/input/digit-recognizer/test.csv


In [179]:
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
submission=pd.read_csv('/kaggle/input/digit-recognizer/sample_submission.csv')
submission

Unnamed: 0,ImageId,Label
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0
...,...,...
27995,27996,0
27996,27997,0
27997,27998,0
27998,27999,0


In [180]:
y=train["label"].to_numpy()
x=train.drop(columns=['label']).to_numpy()
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.20, random_state=0)

In [181]:
import numpy as np

class Linear:
    def __init__(self, input_dim, output_dim):
        
        self.W = np.random.randn(input_dim, output_dim) * 0.01
        self.b = np.zeros((1, output_dim))

    def forward(self, X):
        self.X = X
        return np.dot(X, self.W) + self.b

    def backward(self, dA):
        m = self.X.shape[0]
        self.dW = np.dot(self.X.T, dA) / m
        self.db = np.sum(dA, axis=0, keepdims=True) / m
        return  np.dot(dA, self.W.T)

In [182]:
class ReLU:
    def forward(self, X):
        self.X = X
        
        return np.maximum(0, X)

    def backward(self, dA):
        dX = dA.copy()
        dX[self.X <= 0] = 0
        return dX


In [183]:
class Sigmoid:
    def forward(self, X):
        self.A = 1 / (1 + np.exp(-X))
        return self.A

    def backward(self, dA):
        
        return dA * self.A * (1 - self.A)


In [184]:
import numpy as np

class Softmax:
    def forward(self, inputs):
        """
        Perform the forward pass of the Softmax activation function.

        Args:
            inputs (np.ndarray): Input data of shape (batch_size, num_classes).

        Returns:
            np.ndarray: Output data after applying the Softmax activation function, of the same shape as the input.
        """
        # Compute exponentiated values, with numerical stability
        exp_inputs = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        # normalize by the sum of all the exp values
        self.outputs = exp_inputs / np.sum(exp_inputs, axis=1, keepdims=True)
        return self.outputs

    def backward(self, d_outputs):
        """
        Perform the backward pass of the Softmax activation function to compute gradients.

        Args:
            d_outputs (np.ndarray): Gradient of the loss with respect to the output of this layer, of shape (batch_size, num_classes).

        Returns:
            np.ndarray: Gradient of the loss with respect to the input of this layer, of the same shape as the input.
        """
        batch_size = self.outputs.shape[0]
        num_classes = self.outputs.shape[1]

        # Initialize gradient of the input
        d_inputs = np.zeros_like(d_outputs)

        # Compute gradients for each sample in the batch
        for i in range(batch_size):
            single_output = self.outputs[i].reshape(-1, 1)
            single_grad_output = d_outputs[i]

            # Jacobian matrix for the softmax function
            jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)
            # Compute gradient for the current sample
            d_inputs[i] = np.dot(jacobian_matrix, single_grad_output)

        return d_inputs


In [185]:
class CrossEntropyLoss:
    def forward(self, targets, predictions):
        """
        Perform the forward pass of the Cross-Entropy Loss function.

        Args:
            targets (np.ndarray): True labels, one-hot encoded, of shape (batch_size, num_classes).
            predictions (np.ndarray): Predicted probabilities, of shape (batch_size, num_classes).

        Returns:
            float: The computed cross-entropy loss.
        """
        # Ensure numerical stability by subtracting the max value from predictions
        p_max = np.max(predictions, axis=1, keepdims=True)
        exps = np.exp(predictions - p_max)
        self.softmax = exps / np.sum(exps, axis=1, keepdims=True)
        self.targets = targets

        # Compute the loss using the cross-entropy formula
        batch_size = predictions.shape[0]
        self.loss = -np.sum(targets * np.log(self.softmax + 1e-10)) / batch_size  # Add epsilon for numerical stability
        return self.loss

    def backward(self):
        """
        Perform the backward pass of the Cross-Entropy Loss function to compute gradients.

        Returns:
            np.ndarray: Gradient of the loss with respect to the predictions, of shape (batch_size, num_classes).
        """
        batch_size = self.targets.shape[0]
        # Compute gradient of the loss with respect to the predictions
        d_predictions = (self.softmax - self.targets) / batch_size
        return d_predictions



In [186]:
class SGD:
    def __init__(self, learning_rate):
        """
        Initialize the Stochastic Gradient Descent (SGD) optimizer.

        Args:
            learning_rate (float): Learning rate for the optimizer.
        """
        self.learning_rate = learning_rate

    def step(self, layers):
        """
        Perform a single optimization step by updating the weights and biases of the given layers.

        Args:
            layers (list of objects): List of layers in the network. Each layer should have attributes
            `W` (weights), `b` (biases), `dW` (gradient of weights), and `db` (gradient of biases).

        This method iterates over each layer and updates its weights and biases using the computed gradients.
        """
        for layer in layers:
            if hasattr(layer, 'W') and hasattr(layer, 'dW'):
                # Update weights and biases for layers with weight and bias attributes
                layer.W -= self.learning_rate * layer.dW
                layer.b -= self.learning_rate * layer.db

class one_hot:
    def __init__(self,num_classes):
        self.num_classes=num_classes
        
    
    def one_hot_to_label(self,one_hot_matrix):
        """
        Convert a one-hot encoded matrix to class labels.

        Args:
            y_one_hot (np.ndarray): One-hot encoded array of shape (num_samples, num_classes).

        Returns:
            np.ndarray: Array of class labels of shape (num_samples,).
        """
        return np.argmax(one_hot_matrix, axis=1)

    def convert_to_one_hot(self,vector):
        """
        Convert a vector of integer class labels to one-hot encoded format.

        Args:
            vector (np.ndarray): 1-D array of integer class labels, shape (num_samples,).
            num_classes (int, optional): Number of classes. If None, it is set to the maximum value in the vector + 1.

        Returns:
            np.ndarray: 2-D array of one-hot encoded labels, shape (num_samples, num_classes).
        """
        result = np.zeros((len(vector), self.num_classes), dtype=int)
        result[np.arange(len(vector)), vector] = 1
        return result
  

In [187]:
class Model:
    def __init__(self):
        """
        Initialize the Model class.
        This class manages the layers, loss function, and optimizer for training and inference.
        """
        self.layers = []

    def add_layer(self, layer):
        """
        Add a layer to the model.

        Args:
            layer (object): A layer object that has `forward` and `backward` methods. The layer should 
                            also have attributes like `W` and `b` if it contains learnable parameters.
        """
        self.layers.append(layer)

    def compile(self, loss, optimizer):
        """
        Compile the model by specifying the loss function and optimizer.

        Args:
            loss (object): An instance of a loss class that has `forward` and `backward` methods.
            optimizer (object): An instance of an optimizer class that has a `step` method.
        """
        self.loss = loss
        self.optimizer = optimizer

    def forward(self, X):
        """
        Perform a forward pass through the model.

        Args:
            X (np.ndarray): Input data of shape (batch_size, ...).

        Returns:
            np.ndarray: The output of the model after passing through all layers.
        """
        for layer in self.layers:
            X = layer.forward(X)
        return X

    def backward(self, dA):
        """
        Perform a backward pass through the model to compute gradients.

        Args:
            dA (np.ndarray): Gradient of the loss with respect to the model's output.

        Returns:
            None: The method updates the gradients of the layers in place.
        """
        for layer in reversed(self.layers):
            dA = layer.backward(dA)

    def train(self, X, y, epochs, batch_size):
        """
        Train the model using mini-batch gradient descent.

        Args:
            X (np.ndarray): Training data of shape (num_samples, ...).
            y (np.ndarray): True labels, one-hot encoded, of shape (num_samples, num_classes).
            epochs (int): Number of training epochs.
            batch_size (int): Size of each mini-batch.
        """
        num_samples = X.shape[0]
        for epoch in range(epochs):
            for i in range(0, num_samples, batch_size):
                X_batch = X[i:i+batch_size]
                y_batch = y[i:i+batch_size]

                # Forward pass
                y_pred = self.forward(X_batch)

                # Compute loss
                loss = self.loss.forward(y_batch, y_pred)

                # Backward pass
                dA = self.loss.backward()
                self.backward(dA)

                # Update weights and biases
                self.optimizer.step(self.layers)

            print(f'Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}')

    def predict(self, X):
        """
        Make predictions using the trained model.

        Args:
            X (np.ndarray): Input data of shape (num_samples, ...).

        Returns:
            np.ndarray: Predicted probabilities of shape (num_samples, num_classes).
        """
        return self.forward(X)

    def evaluate(self, X, y):
        """
        Evaluate the model on a test set.

        Args:
            X (np.ndarray): Test data of shape (num_samples, ...).
            y (np.ndarray): True labels, one-hot encoded, of shape (num_samples, num_classes).

        Returns:
            tuple: A tuple containing:
                - np.ndarray: Predicted probabilities of shape (num_samples, num_classes).
                - float: Loss value on the test set.
                - float: Accuracy percentage on the test set.
        """
        y_pred = self.predict(X)
        loss = self.loss.forward(y, y_pred)
        accuracy = np.mean(np.argmax(y_pred, axis=1) == np.argmax(y, axis=1)) * 100
        return y_pred, loss, accuracy

    def save(self, path):
        """
        Save the model to a file.

        Args:
            path (str): Path to the file where the model will be saved.
        """
        with open(path, 'wb') as file:
            pickle.dump(self, file)

    def load(path):
        """
        Load a model from a file.

        Args:
            path (str): Path to the file where the model is saved.

        Returns:
            Model: The loaded model.
        """
        with open(path, 'rb') as file:
            return pickle.load(file)


In [188]:
model = Model()
model.add_layer(Linear(784, 128))
model.add_layer(ReLU())
model.add_layer(Linear(128, 10))
model.add_layer(Softmax())


loss = CrossEntropyLoss()
optimizer = SGD(learning_rate=.01)
model.compile(loss, optimizer)
one_hot=one_hot(10)

y_train_one_hot=one_hot.convert_to_one_hot(y_train)
y_test_one_hot=one_hot.convert_to_one_hot(y_test)
# Assume x_train, y_train, x_test, y_test are preprocessed and available
model.train(x_train, y_train_one_hot, epochs=20, batch_size=64)

test_array,test_loss, test_accuracy = model.evaluate(x_test, y_test_one_hot)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')
model.train(x_test,y_test_one_hot,epochs=20,batch_size=42)
test_array,test_loss, test_accuracy  = model.evaluate(x_test, y_test_one_hot)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')
test_array=model.predict(test)
y_ans=one_hot.one_hot_to_label(test_array)
ans=pd.DataFrame({
    'ImageId':submission["ImageId"].to_numpy(),
    'Label':y_ans
})

Epoch 1/20, Loss: 1.9278
Epoch 2/20, Loss: 1.8618
Epoch 3/20, Loss: 1.8317
Epoch 4/20, Loss: 1.8127
Epoch 5/20, Loss: 1.8013
Epoch 6/20, Loss: 1.7943
Epoch 7/20, Loss: 1.7897
Epoch 8/20, Loss: 1.7868
Epoch 9/20, Loss: 1.7848
Epoch 10/20, Loss: 1.7834
Epoch 11/20, Loss: 1.7824
Epoch 12/20, Loss: 1.7816
Epoch 13/20, Loss: 1.7810
Epoch 14/20, Loss: 1.7806
Epoch 15/20, Loss: 1.7801
Epoch 16/20, Loss: 1.7797
Epoch 17/20, Loss: 1.7793
Epoch 18/20, Loss: 1.7789
Epoch 19/20, Loss: 1.7785
Epoch 20/20, Loss: 1.7780
Test Loss: 1.7121698606004518, Test Accuracy: 75.30952380952381
Epoch 1/20, Loss: 1.8155
Epoch 2/20, Loss: 1.8145
Epoch 3/20, Loss: 1.8130
Epoch 4/20, Loss: 1.8114
Epoch 5/20, Loss: 1.8097
Epoch 6/20, Loss: 1.8080
Epoch 7/20, Loss: 1.8064
Epoch 8/20, Loss: 1.8050
Epoch 9/20, Loss: 1.8035
Epoch 10/20, Loss: 1.8021
Epoch 11/20, Loss: 1.8006
Epoch 12/20, Loss: 1.7992
Epoch 13/20, Loss: 1.7977
Epoch 14/20, Loss: 1.7962
Epoch 15/20, Loss: 1.7946
Epoch 16/20, Loss: 1.7930
Epoch 17/20, Loss:

In [189]:
ans

Unnamed: 0,ImageId,Label
0,1,2
1,2,5
2,3,9
3,4,7
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9


In [190]:
ans.to_csv('/kaggle/working/submission.csv',index=False)