In [4]:
import numpy as np

In [5]:
#Activation Functions
class sigmoid:
  def forward(self, Z):
    return 1 / (1 + np.exp(-Z)), Z

  def backward(self, dA, Z):
    forward_output, _ = self.forward(Z)
    return dA * forward_output * (1 - forward_output)

class Relu:
  def forward(self, Z):
    return np.maximum(0, Z), Z

  def backward(self, dA, Z):
    dZ = np.array(dA, copy=True)
    dZ[Z <= 0] = 0
    return dZ

In [6]:
#Dense Layers
class Layer:
  def __init__(self, size, activation, input_size):
    self.size = size
    self.activation = activation
    self.W = np.random.randn(self.size, input_size) * 0.01 #Weights
    self.b = np.zeros((self.size, 1)) #Biases

  def forward(self, A_prev):
    Z = np.dot(self.W, A_prev) + self.b
    A, activation_cache = self.activation.forward(Z)
    cache = (A_prev, self.W, self.b, activation_cache)
    return A, cache

  def backward(self, dA, cache):
    A_prev, W, b, activation_cache = cache
    dZ = self.activation.backward(dA, activation_cache)
    m = A_prev.shape[1]

    dW = (1 / m) * np.dot(dZ, A_prev.T)
    db = (1 / m) * np.sum(dZ, axis = 1, keepdims = True)
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

In [7]:
#Neural Network

class NeuralNetwork:
  def __init__(self, layer_dims, learning_rate):
    self.Layers = []
    self.learning_rate = learning_rate

    for i in range(1, len(layer_dims)):
      activation = Relu() if i < len(layer_dims) - 1 else sigmoid()
      self.Layers.append(Layer(layer_dims[i], activation, layer_dims[i - 1]))

  def forward_propagation(self, X):
    A = X
    caches = []
    for layer in self.Layers:
      A, cache = layer.forward(A)
      caches.append(cache)
    return A, caches

  def compute_cost(slef, AL, Y):
    m = Y.shape[1]
    e = 1e-15
    AL = np.clip(AL, e, 1 - e)
    cost = (-1 / m) * np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL))
    return float(np.squeeze(cost))

  def backward_propagation(self, AL, Y, caches):
    Y = Y.reshape(AL.shape)
    dAL = -(np.divide(Y, AL) - np.divide(1-Y, 1-AL))

    gradients = []
    dA = dAL

    for layer, cache in reversed(list(zip(self.Layers, caches))):
      dA, dW, db = layer.backward(dA, cache)
      gradients.append((dW, db))

    return list(reversed(gradients))

  def update_parameters(self, gradients):
    for i, layer in enumerate(self.Layers):
      dW, db = gradients[i]
      layer.W -= self.learning_rate * dW
      layer.b -= self.learning_rate * db

  def train(self, X, Y, num_iterations):
    costs = []
    for i in range(num_iterations):
      AL, caches = self.forward_propagation(X)
      cost = self.compute_cost(AL, Y)
      gradients = self.backward_propagation(AL, Y, caches)
      self.update_parameters(gradients)
      costs.append(cost)
      if i % 100 == 0:
        print(f"Cost after iteration {i}: {cost}")
    return costs

  def predict(self, X, threshold = 0.5):
    AL, _ = self.forward_propagation(X)
    predictions = (AL > threshold).astype(int)
    return predictions

In [8]:
np.random.seed(1)
X = np.random.randn(5, 100)  # 100 samples with 5 features
Y = (np.random.randn(1, 100) > 0).astype(int)


layer_dims = [5, 4, 3, 1]  # 4-layer model
nn = NeuralNetwork(layer_dims, learning_rate=0.0075)


costs = nn.train(X, Y, num_iterations=2500)

Cost after iteration 0: 0.6931472236781854
Cost after iteration 100: 0.691582435525438
Cost after iteration 200: 0.6905072294439459
Cost after iteration 300: 0.6897682428606673
Cost after iteration 400: 0.6892601746351852
Cost after iteration 500: 0.6889107478122627
Cost after iteration 600: 0.6886703447216688
Cost after iteration 700: 0.688504895816492
Cost after iteration 800: 0.6883909978575475
Cost after iteration 900: 0.6883125681923888
Cost after iteration 1000: 0.6882585494485142
Cost after iteration 1100: 0.6882213364083377
Cost after iteration 1200: 0.6881956963056259
Cost after iteration 1300: 0.6881780274832565
Cost after iteration 1400: 0.6881658502346053
Cost after iteration 1500: 0.6881574568064726
Cost after iteration 1600: 0.688151670998625
Cost after iteration 1700: 0.688147682393836
Cost after iteration 1800: 0.6881449325777105
Cost after iteration 1900: 0.6881430366943535
Cost after iteration 2000: 0.6881417295024356
Cost after iteration 2100: 0.6881408281716107
Cost

In [15]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Load MNIST data
def load_mnist():
    mnist = fetch_openml("mnist_784", version=1, as_frame=False)
    X = mnist.data.astype(np.float32)
    Y = mnist.target.astype(np.int32)

    # Normalize pixel values
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Convert labels to one-hot encoding
    encoder = OneHotEncoder(sparse_output=False)
    Y = encoder.fit_transform(Y.reshape(-1, 1))

    return X.T, Y.T

def preprocess_data():
    X, Y = load_mnist()

    # Select only 500 samples for faster training and testing
    X, _, Y, _ = train_test_split(X.T, Y.T, train_size=500, random_state=42)

    # Split into training and testing subsets
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
    return X_train.T, X_test.T, Y_train.T, Y_test.T

if __name__ == "__main__":
    np.random.seed(1)

    # Preprocess data
    X_train, X_test, Y_train, Y_test = preprocess_data()

    # Define the network architecture
    layer_dims = [784, 64, 32, 10]  # 4 layer network for testing
    nn = NeuralNetwork(layer_dims, learning_rate=0.01)

    # Train the neural network
    print("Training the neural network on a subset of MNIST...")
    costs = nn.train(X_train, Y_train, num_iterations=5000)

    # Test the model
    predictions = nn.predict(X_test)
    accuracy = np.mean(np.argmax(predictions, axis=0) == np.argmax(Y_test, axis=0))
    print(f"Test Accuracy: {accuracy * 100:.2f}%")


Training the neural network on a subset of MNIST...
Cost after iteration 0: 6.931828854498803
Cost after iteration 100: 5.662947294453207
Cost after iteration 200: 4.853196181381265
Cost after iteration 300: 4.21748157223142
Cost after iteration 400: 3.5246640008211
Cost after iteration 500: 3.2619701694394685
Cost after iteration 600: 3.2335876351457427
Cost after iteration 700: 3.212205463962775
Cost after iteration 800: 3.1678420411922605
Cost after iteration 900: 3.060260760963288
Cost after iteration 1000: 2.873945367753305
Cost after iteration 1100: 2.7133638699591383
Cost after iteration 1200: 2.590873613542833
Cost after iteration 1300: 2.4833465265540506
Cost after iteration 1400: 2.3686205170482544
Cost after iteration 1500: 2.216162780480342
Cost after iteration 1600: 2.0222770768973577
Cost after iteration 1700: 1.8381703062672086
Cost after iteration 1800: 1.6540883838749825
Cost after iteration 1900: 1.4357751586067633
Cost after iteration 2000: 1.2172484500218284
Cost af