In [150]:
from sklearn import datasets
from sklearn.preprocessing import OneHotEncoder

# Assuming you have a numpy array of integers representing classes


def onehot_helper(Y):
    classes = Y

    # Determine the number of unique classes
    n_classes = np.unique(classes).size

    # Create a zero-filled matrix where rows correspond to samples and columns to classes
    onehot_encoded = np.zeros((classes.size, n_classes), dtype=int)
    # logging.log(\1, self.level)
    # Set the appropriate indices to 1
    onehot_encoded[np.arange(classes.size), classes] = 1
    return onehot_encoded


def prepare_data():
    iris = datasets.load_iris()
    X = np.array(iris["data"])
    Y = np.array(iris["target"])
    # logging.log(\1, self.level)
    # logging.log(\1, self.level)
    # n, n_cols = iris_df.shape
    # d = n_cols - 1
    rng = np.random.default_rng(seed=2147483647)

    # # Shuffle values
    permutation = rng.permutation(n)
    X = X[permutation]
    Y = Y[permutation]
    n_train = int(0.80 * n)

    X_train = X[0:n_train, :]
    Y_train = Y[0:n_train]
    X_test = X[n_train:n, :]
    Y_test = Y[n_train:n]
    # logging.log(\1, self.level)
    # logging.log(\1, self.level)
    # # Create a zero-filled matrix where rows correspond to samples and columns to classes
    Y_train_onehot_encoded = onehot_helper(Y_train)
    Y_test_onehot_encoded = onehot_helper(Y_test)

    # logging.log(\1, self.level)
    # logging.log(\1, self.level)
    # logging.log(\1, self.level)
    # logging.log(\1, self.level)
    return X_train, Y_train_onehot_encoded, X_test, Y_test_onehot_encoded

In [151]:
X_train, Y_train, X_test, Y_test = prepare_data()
logging.log(\1, self.level)

(120, 4) (120, 3) (30, 4) (30, 3)


In [152]:
X_train[0:5]

array([[5.1, 3.3, 1.7, 0.5],
       [6.4, 3.2, 5.3, 2.3],
       [5.3, 3.7, 1.5, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [6. , 3.4, 4.5, 1.6]])

In [153]:
Y_train[0:5]

array([[1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0]])

In [236]:
import numpy as np


class Module:
    def forward(self, inputs):
        raise NotImplementedError

    def backward(self, gradwrtoutput):
        raise NotImplementedError


class Linear(Module):
    name = "linear"

    def __init__(self, input_dim, output_dim, learning_rate=0.07, debug_level=logging.INFO):
        np.random.seed(42)
        self.weights = (
            np.random.randn(input_dim, output_dim) * (5 / 3) / ((input_dim) ** 0.5)
        )
        self.bias = np.random.randn(1, output_dim) * 0.1
        self.learning_rate = learning_rate
        self.level = debug_level

    def forward(self, inputs):
        assert inputs.shape[1] == self.weights.shape[0], "Input dimensions do not match"
        self.X = inputs  # N * D dimensional array.
        return np.dot(self.X, self.weights) + self.bias

    def backward(self, gradwrtoutput):
        # gradwrtoutput is N * C dimensional array.
        logging.log(self.level, f"\tIncoming Gradient Shape={gradwrtoutput.shape}")
        logging.log(self.level, f"\tIncoming Gradient Norm: {np.sqrt(np.sum(np.power(gradwrtoutput, 2), 1))}")
        assert (
            gradwrtoutput.shape[0] == self.X.shape[0]
        ), f"Mismatch in first dimension. The dimensions of incoming gradient  = {gradwrtoutput.shape} self.X = {self.X.shape}"

        assert (
            gradwrtoutput.shape[1] == self.weights.shape[1]
        ), "Mismatch in  gradient dimension, it should be {0} but is {1}".format(
            self.weights.shape[1], gradwrtoutput.shape[1]
        )
        # size should always be
        self.gradwrtinput = np.dot(self.X.T, gradwrtoutput)
        assert self.gradwrtinput.shape == self.weights.shape

        # Compute gradients for weights and bias here.
        backpropagated_gradient = np.dot(gradwrtoutput, self.weights.T)
        # The weights are updated *after* the computation of the gradient
        logging.log(self.level, f"\t weights before update:\n\t{self.weights}")
        logging.log(self.level, f"\t bias before :\n\t{self.bias}")
        self.weights -= self.learning_rate * self.gradwrtinput
        self.bias -= self.learning_rate * gradwrtoutput.sum(0)
        logging.log(self.level, f"\t Weights after update:\n\t{self.weights}")
        logging.log(self.level, f"\t Bias:\n\t{self.bias}")
        logging.log(self.level, f"\tBackpropagated gradient shape {backpropagated_gradient.shape}")
        return backpropagated_gradient


class ReLU(Module):

    name = "relu"

    def __init__(self, debug_level = logging.INFO):
        self.inputs = None
        self.level = debug_level

    def forward(self, inputs):

        self.inputs = inputs
        # np.maximum(0, np.array([[1, 2], [-1, 12], [-2, 3]]))
        # array([[ 1,  2],
        #    [ 0, 12],
        #    [ 0,  3]])
        return np.maximum(0, inputs)

    def backward(self, gradwrtoutput):  # dLoss/da'
        logging.log(self.level, f"\tIncoming Gradient shape={gradwrtoutput.shape}") # Shape is like self.inputs.shape.
        # logging.log(self.level, self.inputs.shape)
        # The below op is a bit subtle in the complexity it "hides" the full jacobian.
        # In reality the gradient is the da/dz in the expression: `dloss/dZ = dloss/da * da/dz` = gradwrtoutput * da/dz
        # where z is input to the RELU and a is output of the relu. If we take a single example both are vectors.
        # da/dz is the local gradient and is actually a jacobian but we never actually compute the full
        self.local_gradient = np.where(self.inputs > 0, 1, 0)
        backpropagated_gradient = (
            gradwrtoutput * self.local_gradient
        )  # Element wise product
        logging.log(self.level, f"\tBackpropagated Gradient shape={backpropagated_gradient.shape}")
        return backpropagated_gradient


class Sequential(Module):

    def __init__(self, *args, debug_level=logging.INFO):
        self.modules = list(args)
        self.level = debug_level

    def forward(self, inputs):
        for i, module in enumerate(self.modules):
            logging.log(self.level, f'Forward Prop through  {module.name} layer {i+1}')
            inputs = module.forward(inputs)
        return inputs

    def backward(self, gradwrtoutput):
        for i, module in enumerate(reversed(self.modules)):
            logging.log(self.level, f'Backward Prop through  {module.name} layer {len(self.modules) - i}')
            gradwrtoutput = module.backward(gradwrtoutput)
        return gradwrtoutput


class CrossEntropyLoss:
    """Also contains the softmax and so computes the dLoss/dLogits"""

    def __init__(self, logits, Y, model: Sequential):
        self.logits = logits  # N * C dimensional array.
        self.Y = Y  # N * C dimensional one-hot encoded array of ground truth.
        self.loss, self.activations = ce_loss_helper(logits, Y)
        self.model = model

    def backward(self):
        gradwrtoutput = self.activations - self.Y  #
        assert gradwrtoutput.shape == self.Y.shape == self.activations.shape
        self.model.backward(gradwrtoutput)
        return gradwrtoutput  # N * C dimensional array.

    def __str__(self):
        return str(self.loss)


def ce_loss_helper(logits, Y):
    exps = np.exp(logits - np.max(logits))  # Numeric stability.
    probs = exps / np.sum(exps, axis=1, keepdims=True) # Careful with the axis
    return -np.mean(np.sum(Y * np.log(probs + 1e-9), axis=1), axis=0), probs

In [242]:
LR = 0.035
# LOGGING_LEVEL = logging.DEBUG
logging.basicConfig(level=logging.INFO)
nn_model = Sequential(
    Linear(4, 8, learning_rate=LR, debug_level=logging.DEBUG), 
    ReLU(debug_level=logging.DEBUG), 
    Linear(8, 3, learning_rate=LR, debug_level=logging.DEBUG),
    debug_level=logging.DEBUG,
)

In [243]:
# model = Sequential(Linear(2, 3), ReLU(), Linear(3, 2))
# Loop over epochs
#   Do a forward pass
#   Compute the loss
#   Do a backward pass
for i in range(100):
    logits = nn_model.forward(X_train)  # logits are the penultimate layer's output
    loss = CrossEntropyLoss(
        logits, Y_train, nn_model
    )  # Return the loss, what about the derivative?

    loss.backward()
    print("Loss=", loss)

    logits = nn_model.forward(X_test)

    # Measure validation loss
    validation_loss_val, validation_probs = ce_loss_helper(logits, Y_test)
    print("Validation Loss=", validation_loss_val)

# Do the forward pass.

Loss= 4.250118412320548
Validation Loss= 16.578612669357135
Loss= 13.124735029699396
Validation Loss= 12.433959501767848
Loss= 14.160898321596713
Validation Loss= 4.102106969801691
Loss= 4.059757110273953
Validation Loss= 2.4845954474287684
Loss= 2.0785242164009476
Validation Loss= 1.6756881126368588
Loss= 1.499290782475049
Validation Loss= 1.2401635614029363
Loss= 1.1370848012678654
Validation Loss= 1.0909962343526791
Loss= 1.101196357857616
Validation Loss= 1.1360384906047591
Loss= 1.0970630842887876
Validation Loss= 1.1138279778438325
Loss= 1.0963545923014864
Validation Loss= 1.1237926356910057
Loss= 1.0961931540603738
Validation Loss= 1.1191168327593581
Loss= 1.096160110045188
Validation Loss= 1.121268211805386
Loss= 1.0961529168444135
Validation Loss= 1.1202692813309119
Loss= 1.0961513905156752
Validation Loss= 1.1207311755848963
Loss= 1.096151062235832
Validation Loss= 1.1205171882322789
Loss= 1.0961509920218948
Validation Loss= 1.120616236433261
Loss= 1.0961509769598432
Validati

In [229]:
a = np.random.randn(2, 3)

norm = np.sqrt(np.sum(np.power(a, 2), 1))
print(norm)
print(a)
np.where(a > 0, 0, 1)

[1.8126753  2.37655405]
[[-1.05771093  0.82254491 -1.22084365]
 [ 0.2088636  -1.95967012 -1.32818605]]


array([[1, 0, 1],
       [0, 1, 1]])

In [318]:
import logging

# Set up basic configuration for logging
# Adjust the level as per your requirement: DEBUG, INFO, WARNING, ERROR, CRITICAL
# logging.basicConfig(level=logging.DEBUG)
logger = logging.Logger("Jupyter", level=logging.INFO)

In [319]:
logger.log(logging.INFO, "Some message")

In [288]:
logging.INFO

20

In [277]:
logging.Logger?

[0;31mInit signature:[0m [0mlogging[0m[0;34m.[0m[0mLogger[0m[0;34m([0m[0mname[0m[0;34m,[0m [0mlevel[0m[0;34m=[0m[0;36m0[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Instances of the Logger class represent a single logging channel. A
"logging channel" indicates an area of an application. Exactly how an
"area" is defined is up to the application developer. Since an
application can have any number of areas, logging channels are identified
by a unique string. Application areas can be nested (e.g. an area
of "input processing" might include sub-areas "read CSV files", "read
XLS files" and "read Gnumeric files"). To cater for this natural nesting,
channel names are organized into a namespace hierarchy where levels are
separated by periods, much like the Java or Python package namespace. So
in the instance given above, channel names might be "input" for the upper
level, and "input.csv", "input.xls" and "input.gnu" for the sub-levels.
There is no arbi