In [5]:
# Imports
import numpy as np
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from torch import nn

In [6]:
# helper functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def softmax(x):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)

def compute_loss(y_true, y_pred):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), y_true.argmax(axis=1)])
    loss = np.sum(log_likelihood) / m
    return loss

In [7]:
# defining a neural network class
class SimpleNN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate):
        # Initialize weights and biases
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))
        self.learning_rate = learning_rate

    def forward(self, X):
        # Forward pass
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = sigmoid(self.Z1)
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = softmax(self.Z2)
        return self.A2

    def backward(self, X, y, output):
        # Backward pass
        m = y.shape[0]
        dZ2 = output - y
        dW2 = np.dot(self.A1.T, dZ2) / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m
        dZ1 = np.dot(dZ2, self.W2.T) * sigmoid_derivative(self.A1)
        dW1 = np.dot(X.T, dZ1) / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m

        # Update weights and biases
        self.W1 -= self.learning_rate * dW1
        self.b1 -= self.learning_rate * db1
        self.W2 -= self.learning_rate * dW2
        self.b2 -= self.learning_rate * db2

    def train(self, X, y, epochs):
        for epoch in range(epochs):
            output = self.forward(X)
            self.backward(X, y, output)
            loss = compute_loss(y, output)
            if (epoch + 1) % 100 == 0:
                print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}')

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

In [9]:
# Transform the data to a flattened array of 784 features
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(-1))  # Flatten the tensor to a 784-dimensional vector
])

# Download and load the training and test datasets
train_dataset = datasets.MNIST(root='dataset', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='dataset', train=False, transform=transform, download=True)

# Data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

# Convert the data into NumPy arrays
def load_data(loader):
    data, targets = [], []
    for images, labels in loader:
        data.append(images.numpy())
        targets.append(labels.numpy())
    data = np.vstack(data)
    targets = np.hstack(targets)
    return data, targets

X_train, y_train = load_data(train_loader)
X_test, y_test = load_data(test_loader)

# One-hot encode the labels
y_train = np.eye(10)[y_train]

y_test = np.eye(10)[y_test]

In [10]:
%%time
# training network
input_size = 784
hidden_size = 50
output_size = 10
learning_rate = 0.1
epochs = 1000

nn = SimpleNN(input_size, hidden_size, output_size, learning_rate)
nn.train(X_train, y_train, epochs)

Epoch 100/1000, Loss: 2.2874
Epoch 200/1000, Loss: 2.1702
Epoch 300/1000, Loss: 1.7493
Epoch 400/1000, Loss: 1.3145
Epoch 500/1000, Loss: 1.0437
Epoch 600/1000, Loss: 0.8697
Epoch 700/1000, Loss: 0.7481
Epoch 800/1000, Loss: 0.6604
Epoch 900/1000, Loss: 0.5955
Epoch 1000/1000, Loss: 0.5461


In [11]:
%%time
# evaluation
def evaluate(model, X, y):
    predictions = model.predict(X)
    accuracy = np.mean(predictions == y.argmax(axis=1))
    return accuracy

train_accuracy = evaluate(nn, X_train, y_train)
test_accuracy = evaluate(nn, X_test, y_test)

print(f'Accuracy on training set: {train_accuracy * 100:.2f}')
print(f'Accuracy on test set: {test_accuracy * 100:.2f}')

Accuracy on training set: 86.23
Accuracy on test set: 86.60
