IMPORTING PACKAGES BELOW-

In [1]:
# creating a neural network for mnist image classification in the DigitRecognizer competition on kaggle
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split


Linear Layer Class:

Implements a fully connected layer.

Methods: forward, backward.

In [2]:
class Layer:
    def forward(self, inputs):
        """
        Compute the forward pass.
        """
        raise NotImplementedError

    def backward(self, gradient):
        """
        Compute the backward pass.
        """
        raise NotImplementedError

class Linear(Layer):
    """Fully connected layer."""

    def __init__(self, input_size, output_size):
        """
        Initialize the layer.
        """
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.biases = np.zeros((1, output_size))
        self.inputs = None
        self.gradients = {'weights': None, 'biases': None}

    def forward(self, inputs):
        """Compute forward pass."""
        self.inputs = inputs
        return np.dot(inputs, self.weights) + self.biases

    def backward(self, gradient):
        """Compute backward pass."""
        self.gradients['weights'] = np.dot(self.inputs.T, gradient)
        self.gradients['biases'] = np.sum(gradient, axis=0, keepdims=True)
        return np.dot(gradient, self.weights.T)

ReLU Activation Class:

Implements the ReLU activation function.

Methods: forward, backward.


In [3]:
class ReLU(Layer):
    """ReLU activation layer."""
    
    def __init__(self):
        self.inputs = None
        
    def forward(self, inputs):
        """Apply ReLU activation."""
        self.inputs = inputs
        return np.maximum(0, inputs)

    def backward(self, gradient):
        """Compute gradient of ReLU."""
        return gradient * (self.inputs > 0)


Sigmoid Activation Class:

Implements the Sigmoid activation function.

Methods: forward, backward.

In [4]:
class Sigmoid:
    def __init__(self):
        self.output = None

    def forward(self, inputs):
        """
        Perform the forward pass of the sigmoid function.
        
        :param inputs: Input values (can be a scalar or an array)
        :return: Output of the sigmoid function
        """
        self.output = 1 / (1 + np.exp(-inputs))
        return self.output

    def backward(self, values):
        """
        Perform the backward pass of the sigmoid function.
        
        :param dvalues: Gradient of the loss with respect to the output of the sigmoid
        :return: Gradient of the loss with respect to the input of the sigmoid
        """
        return values * self.output * (1 - self.output)

Tanh Activation Class:

Implements the Tanh activation function.

Methods: forward, backward.

In [5]:
class Tanh:
    def __init__(self):
        self.output = None

    def forward(self, inputs):
        """
        Perform the forward pass of the tanh function.
        
        :param inputs: Input values (can be a scalar or an array)
        :return: Output of the tanh function
        """
        self.output = np.tanh(inputs)
        return self.output

    def backward(self, dvalues):
        """
        Perform the backward pass of the tanh function.
        
        :param dvalues: Gradient of the loss with respect to the output of tanh
        :return: Gradient of the loss with respect to the input of tanh
        """
        return dvalues * (1 - np.square(self.output))

Softmax Activation Class:

Implements the Softmax activation function.

Methods: forward, backward.

In [6]:
class Softmax(Layer):
    """Softmax activation layer."""

    def forward(self, inputs):
        """Apply Softmax activation."""
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        self.probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        return self.probabilities

    def backward(self, gradient):
        """Compute gradient of Softmax."""
        return gradient  # Gradient calculation is handled by CrossEntropyLoss

1. Cross-Entropy Loss Class:

   Implements the cross-entropy loss function. You can use the fusion method described in the PDF as    well. See how nn.CrossEntropyLoss in PyTorch works.

   Methods: forward, backward.

2. Mean Squared Error (MSE) Loss Class:

   Implements the MSE loss function.

   Methods: forward, backward


In [7]:
class Loss:
    """Base class for loss functions."""

    def forward(self, predictions, targets):
        """Compute the loss."""
        raise NotImplementedError

    def backward(self, predictions, targets):
        """Compute the gradient of the loss."""
        raise NotImplementedError
        
class CrossEntropyLoss(Loss):
    """Cross-entropy loss function."""

    def forward(self, predictions, targets):
        """Compute cross-entropy loss."""
        eps = 1e-15  # To avoid log(0) errors
        predictions = np.clip(predictions, eps, 1 - eps)
        return -np.sum(targets * np.log(predictions)) / predictions.shape[0]

    def backward(self, predictions, targets):
        """Compute gradient of cross-entropy loss."""
        eps = 1e-15
        predictions = np.clip(predictions, eps, 1 - eps)
        return (predictions - targets) / predictions.shape[0]
    
class MSELoss:
    def __init__(self):
        self.dinputs = None
        self.y_pred = None
        self.y_true = None

    def forward(self, y_pred, y_true):
        """
        Compute the Mean Squared Error loss.

        :param y_pred: Predicted values
        :param y_true: True values
        :return: MSE loss
        """
        self.y_pred = y_pred
        self.y_true = y_true
        
        # Calculate loss
        sample_losses = np.mean((y_true - y_pred)**2, axis=-1)
        return np.mean(sample_losses)

    def backward(self, dvalues=1):
        """
        Compute the gradient of the loss with respect to the inputs.

        :param dvalues: Gradient of the loss with respect to the output of this layer.
                        Usually 1 unless we're using this as part of a larger network.
        :return: Gradient of the loss with respect to the inputs (y_pred)
        """
        # Number of samples
        samples = len(self.y_pred)
        # Number of outputs in every sample
        outputs = len(self.y_pred[0])
        
        # Gradient on predictions
        self.dinputs = -2 * (self.y_true - self.y_pred) / outputs
        # Normalize gradient
        self.dinputs = self.dinputs / samples
        
        return self.dinputs

SGD Optimizer Class:

Implements the stochastic gradient descent optimizer.

Methods: step.


In [8]:
class SGD:
    """Stochastic Gradient Descent optimizer."""

    def __init__(self, learning_rate):
        """
        Initialize the optimizer.
        """
        self.learning_rate = learning_rate

    def step(self, layer):
        """
        Perform a single optimization step.
        """
        layer.weights -= self.learning_rate * layer.gradients['weights']
        layer.biases -= self.learning_rate * layer.gradients['biases']

Model Class:

Wraps everything into a cohesive model.

Methods: add_layer, compile, train, predict, evaluate, save, and load.

In [9]:
class Model:
    """Neural network model."""

    def __init__(self):
        """Initialize the model."""
        self.layers = []
        self.loss = None
        self.optimizer = None

    def add_layer(self, layer):
        """Add a layer to the model."""
        self.layers.append(layer)

    def compile(self, loss, optimizer):
        """Compile the model."""
        self.loss = loss
        self.optimizer = optimizer

    def forward(self, inputs):
        """Perform forward pass through all layers."""
        for layer in self.layers:
            inputs = layer.forward(inputs)
        return inputs

    def backward(self, gradient):
        """Perform backward pass through all layers."""
        for layer in reversed(self.layers):
            gradient = layer.backward(gradient)

    def train(self, X, y, epochs, batch_size):
        """Train the model."""
        for epoch in range(epochs):
            epoch_loss = 0
            indices = np.arange(X.shape[0])
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]

            for i in range(0, len(X), batch_size):
                X_batch = X[i:i+batch_size]
                y_batch = y[i:i+batch_size]

                predictions = self.forward(X_batch)
                batch_loss = self.loss.forward(predictions, y_batch)
                epoch_loss += batch_loss

                gradient = self.loss.backward(predictions, y_batch)
                self.backward(gradient)

                for layer in self.layers:
                    if hasattr(layer, 'weights') and hasattr(layer, 'biases'):
                        self.optimizer.step(layer)

            print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(X):.4f}")

    def predict(self, X):
        """Make predictions on new data."""
        return self.forward(X)

    def evaluate(self, X, y):
        """Evaluate the model."""
        predictions = self.predict(X)
        loss = self.loss.forward(predictions, y)
        accuracy = np.mean(np.argmax(predictions, axis=1) == np.argmax(y, axis=1))
        return loss, accuracy
    
    def save(self, filepath):
        """Save the model weights to a file."""
        weights = []
        for layer in self.layers:
            if hasattr(layer, 'weights') and hasattr(layer, 'biases'):
                weights.append((layer.weights, layer.biases))
        with open(filepath, 'wb') as f:
            pickle.dump(weights, f)
        print(f"Weights saved to {filepath}")

    def load(self, filepath):
        """Load the model weights from a file."""
        with open(filepath, 'rb') as f:
            weights = pickle.load(f)
    
        weight_index = 0  # Counter for the weights list
    
        for layer in self.layers:
            if hasattr(layer, 'weights') and hasattr(layer, 'biases'):
                try:
                    layer.weights, layer.biases = weights[weight_index]
                    weight_index += 1  # Increment the counter only if weights were loaded
                except IndexError:
                    print(f"Error: Mismatch in the number of layers. Could not load weights for layer {layer}.")
                    break
        print(f"Weights loaded from {filepath}")

One hot encoding - 

In [10]:
def one_hot_encode(y, num_classes):
    return np.eye(num_classes)[y.astype(int).reshape(-1)]

Loading and preprocessing data - 


In [11]:
training_data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
X_train = training_data.iloc[:, 1:].values
y_train = training_data.iloc[:, 0].values

X_train = X_train.astype('float32') / 255
y_train = y_train.astype('int')

# One-hot encode the labels
y_train = one_hot_encode(y_train, 10)

Initializing, compiling and training the model - 


In [12]:
model = Model()
model.add_layer(Linear(784, 128))
model.add_layer(ReLU())
model.add_layer(Linear(128, 10))
model.add_layer(Softmax())

loss = CrossEntropyLoss()
optimizer = SGD(learning_rate=0.01)
model.compile(loss, optimizer)

# Train the model
model.train(X_train, y_train, epochs=20, batch_size=64)

Epoch 1/20, Loss: 0.0321
Epoch 2/20, Loss: 0.0150
Epoch 3/20, Loss: 0.0090
Epoch 4/20, Loss: 0.0071
Epoch 5/20, Loss: 0.0063
Epoch 6/20, Loss: 0.0058
Epoch 7/20, Loss: 0.0054
Epoch 8/20, Loss: 0.0052
Epoch 9/20, Loss: 0.0050
Epoch 10/20, Loss: 0.0048
Epoch 11/20, Loss: 0.0047
Epoch 12/20, Loss: 0.0045
Epoch 13/20, Loss: 0.0044
Epoch 14/20, Loss: 0.0043
Epoch 15/20, Loss: 0.0042
Epoch 16/20, Loss: 0.0041
Epoch 17/20, Loss: 0.0040
Epoch 18/20, Loss: 0.0039
Epoch 19/20, Loss: 0.0038
Epoch 20/20, Loss: 0.0037


Now , it's time for the Test dataset -

In [13]:
test_data = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
X_test = np.array(test_data)
X_test = X_test.astype('float32') / 255
y_predicted = model.predict(X_test)
y_predicted = np.argmax(y_predicted, axis=1)
print(y_predicted.T.shape)

(28000,)


Creating a csv file with the Image ID and the label for submission in the competition -  

In [14]:
import csv
with open('predictions.csv', 'w') as fileObj:
    writerObj = csv.writer(fileObj)
    writerObj.writerow(['ImageId','Label'])
    for i in range(1,28001):
        writerObj.writerow([i, y_predicted[i-1]])

Saving the model - 

In [15]:
model.save('/kaggle/working/model_weights')

Weights saved to /kaggle/working/model_weights


Loading the weights and biases - 

In [16]:
model.load('/kaggle/working/model_weights')

Weights loaded from /kaggle/working/model_weights
