In [6]:
import numpy as np
import tarfile
import pickle
import os
import matplotlib.pyplot as plt
import seaborn as sns


# Important for the import of cifar 10
import sys
sys.path.append("../")
from dataloader import load_cifar10
from dataloader import load_mnist



# Section 1: Dataloading + converting to variable with gradient

### Load the cifar-10

In [8]:
# Load CIFAR-10
#X_train, y_train, X_val, y_val, X_test, y_test = load_cifar10("../data/cifar-10-python.tar.gz")
# Load MNIST
X_train, y_train, X_val, y_val, X_test, y_test = load_mnist("/workspaces/02456-deep-learning/data/mnist")

### THE SHAPE OF CIFAR IS:    (RxGxB = 3) x 32 x 32       x      (no. pictures)

Train set: (54000, 1, 28, 28) (54000,)
Validation set: (6000, 1, 28, 28) (6000,)
Test set: (10000, 1, 28, 28) (10000,)


In [101]:
X_flat = X_train.reshape(X_train.shape[0], -1)  # (54000, 784)
X_flat.shape

(54000, 784)

So the input for each picture will be: 1 * 28 * 28 = 784

In [92]:
X_train[0].shape

(1, 28, 28)

In [48]:
ex = X_train[0].flatten()
ex.shape

(784,)

# Section 2: FFN

> ### Forward method of layer 

In [None]:
def activation(activationName, Z):
    if activationName == "RELU":
        return np.maximum(0, Z)
    
    if activationName == "Softmax":
        # x: (batch_size, n_classes)
        x_shifted = Z - np.max(Z, axis=-1, keepdims=True)
        e_x = np.exp(x_shifted)
        return e_x / e_x.sum(axis=-1, keepdims=True)




In [132]:
class Layer:
    def __init__(self, input_dim, output_dim, activation = "RELU"):
        print(f'Layer with input_dim: {input_dim}, output dim, {output_dim}')
        self.inp = input_dim
        self.out = output_dim
        self.activation = activation

        self.a = None
        self.z = None
        
        #Vi ganger initialiseingne med meget små vægte, for at undgå at softmax bare bliver (0,1)
        self.W = np.random.rand(output_dim, input_dim) * 0.01
        self.b = np.random.rand(output_dim) * 0.01
    
    #This forward will either calculate for a single vector (z) or a batch
    def forward(self, z):
        # z can be (input_dim,) or (batch_size, input_dim)
        if z.ndim == 1:
            self.a = self.W @ z + self.b
        else:
            self.a = z @ self.W.T + self.b  # batch forward
        self.z = activation(self.activation, self.a)
        return self.z


In [137]:
layer1 = Layer(784, 5, "RELU")
layer2 = Layer(5, 10, "Softmax")

layer1.forward(X_flat)
layer2.forward(layer1.z)

layer2.z[1]



Layer with input_dim: 784, output dim, 5
Layer with input_dim: 5, output dim, 10


array([0.21218628, 0.24674147, 0.13244197, 0.06904621, 0.01833682,
       0.00784111, 0.07248769, 0.05375045, 0.01783634, 0.16933166])

> ### Forward method of layer + backwards 

1. We are working with multi-classification, so the only last layer activation function will be softmax
2. We want to be able to use cross-entropy loss function

$\rightarrow$  We only have to derive  $\frac{\partial E_{cross}(z)}{2}$

# Draft

In [None]:
import numpy as np
from typing import List, Optional, Tuple, Union

# --------------------
# Activations
# --------------------
def sigmoid(Z):
    return 1.0 / (1.0 + np.exp(-Z))

def sigmoid_prime(Z):
    s = sigmoid(Z)
    return s * (1 - s)

def relu(Z):
    return np.maximum(0, Z)

def relu_prime(Z):
    return (Z > 0).astype(float)

def tanh(Z):
    return np.tanh(Z)

def tanh_prime(Z):
    return 1 - np.tanh(Z)**2

def softmax(Z):
    Zc = Z - np.max(Z, axis=0, keepdims=True)
    expZ = np.exp(Zc)
    return expZ / np.sum(expZ, axis=0, keepdims=True)

# map activation name to function/derivative
_ACTIVATIONS = {
    'sigmoid': (sigmoid, sigmoid_prime),
    'relu':    (relu, relu_prime),
    'tanh':    (tanh, tanh_prime),
    'softmax': (softmax, None)  # softmax derivative handled together with CE loss
}

# --------------------
# Initializer
# --------------------
class initializer:
    @staticmethod
    def init_weights(n_in: int, n_out: int, act_fn: str):
        # He for ReLU, Xavier for tanh/sigmoid/softmax
        if act_fn == 'relu':
            return np.random.randn(n_out, n_in) * np.sqrt(2.0 / n_in)
        else:
            # Xavier/Glorot
            return np.random.randn(n_out, n_in) * np.sqrt(1.0 / n_in)

    @staticmethod
    def init_bias(n_out: int):
        return np.zeros((n_out, 1))


# --------------------
# Utilities
# --------------------
def to_one_hot(y: np.ndarray, num_classes: int) -> np.ndarray:
    """
    y: shape (m,) with integer labels [0..num_classes-1]
    returns: (num_classes, m)
    """
    m = y.shape[0]
    Y = np.zeros((num_classes, m))
    Y[y, np.arange(m)] = 1
    return Y



In [None]:

# --------------------
# Layer (fully connected)
# --------------------
class Layer:
    def __init__(self, n_in: int, n_out: int, act_fn: str = 'relu'):
        assert act_fn in _ACTIVATIONS, f"Unsupported activation '{act_fn}'"
        self.n_in = n_in
        self.n_out = n_out
        self.act_name = act_fn
        self.act, self.act_prime = _ACTIVATIONS[act_fn]

        # parameters
        self.W = initializer.init_weights(n_in, n_out, act_fn)
        self.b = initializer.init_bias(n_out)


    def forward(self, A_prev: np.ndarray) -> np.ndarray:
        """
        A_prev: (n_in, m)
        returns A: (n_out, m)
        """
        self.Z = np.dot(self.W, A_prev) + self.b  # (n_out, m)
        if self.act_name == 'softmax':
            self.A = self.act(self.Z)
        else:
            self.A = self.act(self.Z)
        return self.A



In [None]:

# --------------------
# Neural Network
# --------------------
class NeuralNetwork:
    def __init__(self):
        self.layers: List[Layer] = []

    def add_layer(self, n_in: int, n_out: int, act_fn: str = 'relu'):
        """
        Add a fully-connected layer. Provide n_in for the layer.
        (n_in must match previous layer's n_out if stacking.)
        """
        layer = Layer(n_in, n_out, act_fn)
        self.layers.append(layer)

    def forward(self, X: np.ndarray) -> np.ndarray:
        """
        X: (n_x, m)
        returns A_last: (n_last, m)
        """
        A = X
        for layer in self.layers:
            A = layer.forward(A)
        return A


# --------------------
# Example usage with flattened image input (e.g. CIFAR-10: 32*32*3 = 3072)
# --------------------
if __name__ == "__main__":
    # --- load CIFAR-10 (you provided this) ---
    # X_train, y_train, X_val, y_val, X_test, y_test = load_cifar10("../data/cifar-10-python.tar.gz")
    # For this example we assume the above line executed already and variables exist.

    # Quick helper to convert many possible CIFAR shapes into (n_features, m)
    def prepare_X_for_forward(X):
        """
        Returns X_proc with shape (n_features, m)
        Accepts:
          - (3,32,32,m)
          - (m,3,32,32)
          - (m,32,32,3)
          - (m, 3072)
        """
        X = np.asarray(X)
        if X.ndim == 4:
            # Case: (3,32,32,m)
            if X.shape[0] == 3 and X.shape[1] == 32 and X.shape[2] == 32:
                # reorder to (m, 3, 32, 32)
                X = np.transpose(X, (3, 0, 1, 2))
            # Now possible shapes: (m, 3, 32, 32) or (m, 32, 32, 3)
            m = X.shape[0]
            if X.shape[1] == 3 and X.shape[2] == 32 and X.shape[3] == 32:
                # channels-first (m,3,32,32)
                X_flat = X.reshape(m, -1)   # (m, 3072)
            elif X.shape[1] == 32 and X.shape[2] == 32 and X.shape[3] == 3:
                # channels-last (m,32,32,3)
                X_flat = X.reshape(m, -1)
            else:
                raise ValueError(f"Unrecognized 4D shape: {X.shape}")
        elif X.ndim == 2:
            # Already flattened e.g. (m, 3072) or (3072, m)
            if X.shape[1] == 3072:
                X_flat = X
            elif X.shape[0] == 3072:
                # already (3072, m) — transpose to (m,3072) for consistency below
                X_flat = X.T
            else:
                raise ValueError(f"Unrecognized 2D shape: {X.shape}")
        else:
            raise ValueError(f"Unrecognized X ndim: {X.ndim}")

        # Produce (n_features, m)
        X_proc = X_flat.T  # now (3072, m)
        return X_proc

    # ---- prepare data ----
    # Use a small batch for demo to save memory/time
    # (you can swap in the full X_train if you want)
    # Example: take first 128 train examples (if present)
    num_examples = 128
    try:
        X_sample = X_train[:num_examples]
    except NameError:
        raise RuntimeError("X_train not found. Make sure you ran load_cifar10() before this script.")

    X_proc = prepare_X_for_forward(X_sample)   # shape (3072, m_sample)
    print("Prepared X_proc shape (features, m):", X_proc.shape)

    # ---- build a small FFN ----
    nn = NeuralNetwork()
    # Input layer must match flattened size 3072
    nn.add_layer(3072, 512, act_fn='relu')
    nn.add_layer(512, 256, act_fn='relu')
    # Final layer for CIFAR-10 multiclass -> softmax with 10 outputs
    nn.add_layer(256, 10, act_fn='softmax')

    # ---- forward pass only ----
    A_last = nn.forward(X_proc)   # shape (10, m_sample)
    print("A_last shape (n_out, m):", A_last.shape)

    # If softmax final layer, show probabilities and predictions for first few examples
    if nn.layers[-1].act_name == 'softmax':
        probs = A_last  # already probabilities
        preds = np.argmax(probs, axis=0)  # (m,)
        print("First 6 predicted labels:", preds[:6])
        print("First example softmax probs (first 10 classes):")
        print(probs[:, 0])   # full vector of 10 probs for example 0
    else:
        # For binary output
        print("First 6 outputs (sigmoid):", A_last[:, :6])


Prepared X_proc shape (features, m): (3072, 128)
A_last shape (n_out, m): (10, 128)
First 6 predicted labels: [2 6 6 2 2 2]
First example softmax probs (first 10 classes):
[2.23224749e-055 9.84803441e-090 1.00000000e+000 2.48666246e-128
 2.23522770e-168 5.11786727e-195 9.03688840e-011 2.24619768e-100
 3.98539195e-239 2.31292698e-197]


# Section 3: Loss / Backpropgation