In [1]:
print('hello world')

hello world


In [14]:
#Install Packages
!pip install numpy
!pip install pandas
!pip install matplotlib
!pip install scikit-learn
!pip install keras
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.20.0-cp312-cp312-macosx_12_0_arm64.whl.metadata (4.5 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-1-py2.py3-none-macosx_11_0_arm64.whl.metadata (5.2 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting protobuf>=5.28.0 (from tensorflow)
  Downloading protobuf-6.32.0-cp39-abi3-macosx_10_9_universal2.whl.metadata (593 bytes)
Collecting requests<3,>=2.21

In [8]:
import numpy as np
import sklearn

In [7]:
# === LAYER ===
class Layer():
    def __init__(self, num_inputs, num_outputs):
      self.weights = np.random.randn(num_inputs, num_outputs) * np.sqrt(2.0 / (num_inputs+num_outputs)) 
      self.biases = np.random.randn(1, num_outputs) 

    def forward(self, inputs):
      self.inputs = inputs
      self.outputs = np.dot(self.inputs, self.weights) + self.biases 

    def backward(self, del_z):
      batch_size = self.inputs.shape[0]

      self.del_w = np.dot(self.inputs.T, del_z) / batch_size
      self.del_b = np.sum(del_z, axis=0, keepdims=True) / batch_size # Sum over batch dimension
      self.del_z_prev = np.dot(del_z, self.weights.T) 

# === ACTIVATION: RELU ===
class ReLU():
    def forward(self, inputs):
      self.inputs = inputs
      self.output = np.maximum(0, inputs)

    def backward(self, del_z):
      self.del_z_prev = del_z.copy()
      self.del_z_prev[self.inputs <= 0] = 0

# === ACTIVATION: SOFTMAX ===
class Softmax():
    def forward(self, inputs):
      exp_z = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
      self.output = exp_z / np.sum(exp_z, axis=1, keepdims=True) 

# === LOSS: CROSS-ENTROPY ===
class CategoricalCrossentropy:
    def forward(self, y_pred, y_true):
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        if len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)
        else:
            correct_confidences = y_pred_clipped[range(len(y_pred_clipped)), y_true]
        return -np.mean(np.log(correct_confidences))

# === COMBINED SOFTMAX + CROSS-ENTROPY BACKWARD ===
class Softmax_CategoricalCrossentropy:
    def backward(self, y_pred, y_true):
        samples = len(y_pred)
        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true, axis=1)
        self.del_z_prev = y_pred.copy()
        self.del_z_prev[range(samples), y_true] -= 1 
        self.del_z_prev = self.del_z_prev / samples

# === OPTIMIZER: SGD ===
class SGD:
    def __init__(self, learning_rate=0.01):
        self.lr = learning_rate

    def update_params(self, layer):
        layer.weights -= self.lr * layer.del_w
        layer.biases  -= self.lr * layer.del_b

In [11]:
# === MLP ===
# Allow for flexible number of hidden layers
class MLP:
    def __init__(self, layers:list):
      self.layer_count = len(layers)
      self.hidden_layers = self.layer_count - 1
      self.layers = [Layer(layers[i], layers[i+1]) for i in range(self.hidden_layers)]
      self.activations = [ReLU() for _ in range(self.hidden_layers - 1)] + [Softmax()]
      self.loss = CategoricalCrossentropy()
      self.loss_activation = Softmax_CategoricalCrossentropy() 
      self.optimizer = SGD()


    # Forward Propagation
    def forward(self, x):
      current = x
      for layer, activation in zip(self.layers, self.activations):
        layer.forward(current)              
        activation.forward(layer.outputs)   
        current = activation.output         
      return current


    def backward(self, y_true):
        self.loss_activation.backward(self.activations[-1].output, y_true)
        grad_from_next_layer = self.loss_activation.del_z_prev 
        self.layers[-1].backward(grad_from_next_layer)
        grad_from_next_layer = self.layers[-1].del_z_prev 

        self.optimizer.update_params(self.layers[-1])

        for i in reversed(range(len(self.layers) - 1)):
            current_activation = self.activations[i]  
            current_layer = self.layers[i]            
            current_activation.backward(grad_from_next_layer)
            grad_to_layer_backward = current_activation.del_z_prev 
            current_layer.backward(grad_to_layer_backward)
            grad_from_next_layer = current_layer.del_z_prev 

    def fit(self, X_train, y_train, X_test, y_test, epochs=10, batch_size=64):
        
        # Data preparation (normalization)
        X_train = X_train / 255.0
        X_test = X_test / 255.0
        
        # One-hot encode the labels
        y_train_one_hot = np.zeros((y_train.size, y_train.max() + 1))
        y_train_one_hot[np.arange(y_train.size), y_train] = 1
        
        y_test_one_hot = np.zeros((y_test.size, y_test.max() + 1))
        y_test_one_hot[np.arange(y_test.size), y_test] = 1

        num_samples = len(X_train)

        for epoch in range(epochs):
            
            indices = np.arange(num_samples)
            np.random.shuffle(indices)            
            epoch_loss = 0
            
            # Mini-batch loop
            for i in range(0, num_samples, batch_size):
                batch_indices = indices[i:i + batch_size]
                X_batch = X_train[batch_indices]
                y_batch = y_train_one_hot[batch_indices]

                y_pred = self.forward(X_batch)
                
                # Backward pass
                self.backward(y_batch)
                
                # Calculate and accumulate loss
                loss = self.loss.forward(y_pred, y_batch)
                epoch_loss += loss
            
            # Calculate metrics
            avg_loss = epoch_loss / (len(X_train) / batch_size)
            
            # Training accuracy
            y_pred_train = self.forward(X_train)
            y_pred_train_labels = np.argmax(y_pred_train, axis=1)
            train_accuracy = np.mean(y_pred_train_labels == y_train)   

            # Validation accuracy
            y_pred_test = self.forward(X_test)
            y_pred_test_labels = np.argmax(y_pred_test, axis=1)
            val_accuracy = np.mean(y_pred_test_labels == y_test)

            print(f"Epoch {epoch+1}/{epochs} | Loss: {avg_loss:.4f} | Train Acc: {train_accuracy:.4f} | Val Acc: {val_accuracy:.4f}")    


In [15]:
from keras.datasets import fashion_mnist
from sklearn.model_selection import train_test_split

# Load the dataset
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()

# Flatten the images and normalize pixel values (moved into fit function)
# Images are 28x28, so each input sample has 784 features
X_train_full = X_train_full.reshape(-1, 28*28)
X_test = X_test.reshape(-1, 28*28)

# Split a validation set from the training data
X_train, X_val, y_train, y_val = train_test_split(
    X_train_full, y_train_full, test_size=0.1, random_state=42
)

print(f"Training data shape: {X_train.shape}, labels shape: {y_train.shape}")
print(f"Validation data shape: {X_val.shape}, labels shape: {y_val.shape}")
print(f"Test data shape: {X_test.shape}, labels shape: {y_test.shape}")

Matplotlib is building the font cache; this may take a moment.


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Training data shape: (54000, 784), labels shape: (54000,)
Validation data shape: (6000, 784), labels shape: (6000,)
Test data shape: (10000, 784), labels shape: (10000,)


In [17]:
mlp = MLP(layers=[784, 128, 64, 10])
mlp.fit(X_train, y_train, X_val, y_val, epochs=1000, batch_size=128)

Epoch 1/1000 | Loss: 3.5863 | Train Acc: 0.0998 | Val Acc: 0.1038
Epoch 2/1000 | Loss: 3.3266 | Train Acc: 0.1013 | Val Acc: 0.1045
Epoch 3/1000 | Loss: 3.1331 | Train Acc: 0.1040 | Val Acc: 0.1065
Epoch 4/1000 | Loss: 2.9831 | Train Acc: 0.1080 | Val Acc: 0.1100
Epoch 5/1000 | Loss: 2.8631 | Train Acc: 0.1128 | Val Acc: 0.1147
Epoch 6/1000 | Loss: 2.7644 | Train Acc: 0.1177 | Val Acc: 0.1195
Epoch 7/1000 | Loss: 2.6819 | Train Acc: 0.1240 | Val Acc: 0.1257
Epoch 8/1000 | Loss: 2.6119 | Train Acc: 0.1322 | Val Acc: 0.1342
Epoch 9/1000 | Loss: 2.5520 | Train Acc: 0.1426 | Val Acc: 0.1467
Epoch 10/1000 | Loss: 2.5000 | Train Acc: 0.1529 | Val Acc: 0.1558
Epoch 11/1000 | Loss: 2.4549 | Train Acc: 0.1629 | Val Acc: 0.1663
Epoch 12/1000 | Loss: 2.4154 | Train Acc: 0.1717 | Val Acc: 0.1752
Epoch 13/1000 | Loss: 2.3806 | Train Acc: 0.1789 | Val Acc: 0.1832
Epoch 14/1000 | Loss: 2.3498 | Train Acc: 0.1854 | Val Acc: 0.1910
Epoch 15/1000 | Loss: 2.3226 | Train Acc: 0.1901 | Val Acc: 0.1960
Epoc

In [18]:
# --- Test the trained model using scikit-learn's metric ---

print("\nEvaluating model on test data...")

# Import the necessary function
from sklearn.metrics import accuracy_score

# Make predictions on the test set
y_pred_test = mlp.forward(X_test / 255.0)

# Convert predicted probabilities to class labels
y_pred_test_labels = np.argmax(y_pred_test, axis=1)

# Calculate final test accuracy using sklearn
test_accuracy = accuracy_score(y_test, y_pred_test_labels)

print(f"Final Test Accuracy (with sklearn): {test_accuracy:.4f}")


Evaluating model on test data...
Final Test Accuracy (with sklearn): 0.6451
