In [1]:
import numpy as np

code-neuron


In [2]:
class Neuron:
    def __init__(self, n_inputs):
        # small random weights
        self.w = np.random.randn(n_inputs) * 0.01
        self.b = 0.0

    def forward(self, x):
        # x shape: (n_inputs,)
        z = np.dot(x, self.w) + self.b
        return z


In [3]:
n = Neuron(n_inputs=3)
x = np.array([1.0, 2.0, -1.0])
print("Output:", n.forward(x))


Output: 0.021043225740068748


In [4]:
def relu(z):
    return np.maximum(0, z)


In [5]:
class Neuron:
    def __init__(self, n_inputs):
        self.w = np.random.randn(n_inputs) * 0.01
        self.b = 0.0

    def forward(self, x):
        z = np.dot(x, self.w) + self.b
        a = relu(z)
        return a


dense-layer

In [6]:
class DenseLayer:
    def __init__(self, n_inputs, n_neurons):
        self.W = np.random.randn(n_inputs, n_neurons) * 0.01
        self.b = np.zeros((1, n_neurons))

    def forward(self, X):
        # X shape: (batch_size, n_inputs)
        Z = np.dot(X, self.W) + self.b
        return Z


In [7]:
layer = DenseLayer(n_inputs=3, n_neurons=4)

X = np.array([
    [1.0, 2.0, -1.0],
    [0.5, -0.2, 3.0]
])

Z = layer.forward(X)
print("Z shape:", Z.shape)
print(Z)


Z shape: (2, 4)
[[-0.04037728 -0.04297277 -0.00931055  0.0064748 ]
 [ 0.02036559 -0.0005658  -0.00338519 -0.01383834]]


In [8]:
class ReLU:
    def forward(self, Z):
        return np.maximum(0, Z)


In [9]:
relu_layer = ReLU()
A = relu_layer.forward(Z)
print(A)


[[0.         0.         0.         0.0064748 ]
 [0.02036559 0.         0.         0.        ]]


Layer + Activation Together

In [11]:
dense1 = DenseLayer(3, 5)
act1 = ReLU()

dense2 = DenseLayer(5, 2)   # output layer (2 classes example)

X = np.array([[1.0, 2.0, -1.0]])

Z1 = dense1.forward(X)
A1 = act1.forward(Z1)

Z2 = dense2.forward(A1)
print("Final logits:", Z2)


Final logits: [[-6.72164687e-06  1.91492196e-04]]


backdrop

In [12]:
class DenseLayer:
    def __init__(self, n_inputs, n_neurons):
        self.W = np.random.randn(n_inputs, n_neurons) * 0.01
        self.b = np.zeros((1, n_neurons))
        self.X = None  # cache for backward

    def forward(self, X):
        self.X = X
        Z = np.dot(X, self.W) + self.b
        return Z

    def backward(self, dZ):
        # dZ shape: (batch_size, n_neurons)
        m = self.X.shape[0]

        dW = np.dot(self.X.T, dZ) / m
        db = np.sum(dZ, axis=0, keepdims=True) / m
        dX = np.dot(dZ, self.W.T)

        return dX, dW, db


backprop for relu

In [13]:
class ReLU:
    def __init__(self):
        self.Z = None

    def forward(self, Z):
        self.Z = Z
        return np.maximum(0, Z)

    def backward(self, dA):
        dZ = dA * (self.Z > 0)
        return dZ


2 layer Neural nertowrk

In [14]:
def mse_loss(y_pred, y_true):
    return np.mean((y_pred - y_true) ** 2)

def mse_grad(y_pred, y_true):
    m = y_true.shape[0]
    return (2.0 * (y_pred - y_true)) / m


training


In [15]:
np.random.seed(0)

# Dummy dataset: y = 3x1 - 2x2 + noise
m = 200
X = np.random.randn(m, 2)
y = (3 * X[:, 0] - 2 * X[:, 1] + 0.1 * np.random.randn(m)).reshape(-1, 1)

# Model
dense1 = DenseLayer(n_inputs=2, n_neurons=16)
relu1 = ReLU()
dense2 = DenseLayer(n_inputs=16, n_neurons=1)

lr = 0.05
epochs = 200

for epoch in range(epochs):
    # -------- forward ----------
    Z1 = dense1.forward(X)
    A1 = relu1.forward(Z1)
    y_pred = dense2.forward(A1)

    # -------- loss ----------
    loss = mse_loss(y_pred, y)

    # -------- backward ----------
    dY = mse_grad(y_pred, y)          # dLoss/dOutput
    dA1, dW2, db2 = dense2.backward(dY)
    dZ1 = relu1.backward(dA1)
    dX, dW1, db1 = dense1.backward(dZ1)

    # -------- update ----------
    dense2.W -= lr * dW2
    dense2.b -= lr * db2

    dense1.W -= lr * dW1
    dense1.b -= lr * db1

    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


Epoch 0, Loss: 13.3410
Epoch 20, Loss: 13.3395
Epoch 40, Loss: 13.3380
Epoch 60, Loss: 13.3365
Epoch 80, Loss: 13.3350
Epoch 100, Loss: 13.3335
Epoch 120, Loss: 13.3321
Epoch 140, Loss: 13.3307
Epoch 160, Loss: 13.3293
Epoch 180, Loss: 13.3279


softmax

In [17]:
def softmax(Z):
    Z_shift = Z - np.max(Z, axis=1, keepdims=True)  # stability
    exp = np.exp(Z_shift)
    return exp / np.sum(exp, axis=1, keepdims=True)


cross-entropy

In [18]:
def cross_entropy_loss(Y_pred, Y_true):
    eps = 1e-12
    Y_pred = np.clip(Y_pred, eps, 1.0 - eps)
    return -np.mean(np.sum(Y_true * np.log(Y_pred), axis=1))


Softmax + Cross Entropy Gradient

one-hot-encoding

In [19]:
def one_hot(y, num_classes):
    Y = np.zeros((y.shape[0], num_classes))
    Y[np.arange(y.shape[0]), y] = 1
    return Y


In [20]:
def accuracy(Y_pred_probs, y_true_labels):
    preds = np.argmax(Y_pred_probs, axis=1)
    return np.mean(preds == y_true_labels)


Full Neural Network (Dense + ReLU + Dense + Softmax)

In [21]:
class DenseLayer:
    def __init__(self, n_inputs, n_neurons):
        self.W = np.random.randn(n_inputs, n_neurons) * 0.01
        self.b = np.zeros((1, n_neurons))
        self.X = None

    def forward(self, X):
        self.X = X
        return np.dot(X, self.W) + self.b

    def backward(self, dZ):
        m = self.X.shape[0]
        dW = np.dot(self.X.T, dZ) / m
        db = np.sum(dZ, axis=0, keepdims=True) / m
        dX = np.dot(dZ, self.W.T)
        return dX, dW, db


In [22]:
class ReLU:
    def __init__(self):
        self.Z = None

    def forward(self, Z):
        self.Z = Z
        return np.maximum(0, Z)

    def backward(self, dA):
        return dA * (self.Z > 0)


dataset


In [23]:
def make_blobs(n_samples_per_class=200, seed=0):
    np.random.seed(seed)
    centers = [(-2, 0), (2, 0), (0, 2.5)]
    X_list, y_list = [], []

    for i, c in enumerate(centers):
        Xc = np.random.randn(n_samples_per_class, 2) * 0.7 + np.array(c)
        yc = np.full(n_samples_per_class, i)
        X_list.append(Xc)
        y_list.append(yc)

    X = np.vstack(X_list)
    y = np.concatenate(y_list)

    idx = np.random.permutation(X.shape[0])
    return X[idx], y[idx]


In [24]:
def iterate_minibatches(X, y, batch_size=32, shuffle=True):
    m = X.shape[0]
    idx = np.arange(m)

    if shuffle:
        np.random.shuffle(idx)

    for start in range(0, m, batch_size):
        batch_idx = idx[start:start + batch_size]
        yield X[batch_idx], y[batch_idx]


In [25]:
import numpy as np

# ---------- helpers ----------
def softmax(Z):
    Z_shift = Z - np.max(Z, axis=1, keepdims=True)
    exp = np.exp(Z_shift)
    return exp / np.sum(exp, axis=1, keepdims=True)

def cross_entropy_loss(Y_pred, Y_true):
    eps = 1e-12
    Y_pred = np.clip(Y_pred, eps, 1.0 - eps)
    return -np.mean(np.sum(Y_true * np.log(Y_pred), axis=1))

def one_hot(y, num_classes):
    Y = np.zeros((y.shape[0], num_classes))
    Y[np.arange(y.shape[0]), y] = 1
    return Y

def accuracy(Y_pred_probs, y_true_labels):
    preds = np.argmax(Y_pred_probs, axis=1)
    return np.mean(preds == y_true_labels)

def iterate_minibatches(X, y, batch_size=32, shuffle=True):
    m = X.shape[0]
    idx = np.arange(m)
    if shuffle:
        np.random.shuffle(idx)
    for start in range(0, m, batch_size):
        batch_idx = idx[start:start + batch_size]
        yield X[batch_idx], y[batch_idx]

def make_blobs(n_samples_per_class=200, seed=0):
    np.random.seed(seed)
    centers = [(-2, 0), (2, 0), (0, 2.5)]
    X_list, y_list = [], []
    for i, c in enumerate(centers):
        Xc = np.random.randn(n_samples_per_class, 2) * 0.7 + np.array(c)
        yc = np.full(n_samples_per_class, i)
        X_list.append(Xc)
        y_list.append(yc)
    X = np.vstack(X_list)
    y = np.concatenate(y_list)
    idx = np.random.permutation(X.shape[0])
    return X[idx], y[idx]

# ---------- layers ----------
class DenseLayer:
    def __init__(self, n_inputs, n_neurons):
        self.W = np.random.randn(n_inputs, n_neurons) * 0.01
        self.b = np.zeros((1, n_neurons))
        self.X = None

    def forward(self, X):
        self.X = X
        return np.dot(X, self.W) + self.b

    def backward(self, dZ):
        m = self.X.shape[0]
        dW = np.dot(self.X.T, dZ) / m
        db = np.sum(dZ, axis=0, keepdims=True) / m
        dX = np.dot(dZ, self.W.T)
        return dX, dW, db

class ReLU:
    def __init__(self):
        self.Z = None

    def forward(self, Z):
        self.Z = Z
        return np.maximum(0, Z)

    def backward(self, dA):
        return dA * (self.Z > 0)

# ---------- training ----------
np.random.seed(42)

# Data
X, y = make_blobs(n_samples_per_class=250, seed=1)
num_classes = len(np.unique(y))

# Train/Val split
split = int(0.8 * X.shape[0])
X_train, y_train = X[:split], y[:split]
X_val, y_val = X[split:], y[split:]

Y_train = one_hot(y_train, num_classes)
Y_val = one_hot(y_val, num_classes)

# Model: 2 -> 32 -> 3
dense1 = DenseLayer(2, 32)
relu1 = ReLU()
dense2 = DenseLayer(32, num_classes)

lr = 0.1
epochs = 200
batch_size = 64

for epoch in range(1, epochs + 1):
    epoch_loss = 0.0
    nb = 0

    for Xb, yb in iterate_minibatches(X_train, y_train, batch_size=batch_size, shuffle=True):
        Yb = one_hot(yb, num_classes)

        # Forward
        Z1 = dense1.forward(Xb)
        A1 = relu1.forward(Z1)
        Z2 = dense2.forward(A1)          # logits
        Y_pred = softmax(Z2)             # probabilities

        # Loss
        loss = cross_entropy_loss(Y_pred, Yb)
        epoch_loss += loss
        nb += 1

        # Backward (Softmax + CE gradient)
        m = Xb.shape[0]
        dZ2 = (Y_pred - Yb) / m

        dA1, dW2, db2 = dense2.backward(dZ2)
        dZ1 = relu1.backward(dA1)
        _, dW1, db1 = dense1.backward(dZ1)

        # Update
        dense2.W -= lr * dW2
        dense2.b -= lr * db2
        dense1.W -= lr * dW1
        dense1.b -= lr * db1

    # Validate
    Z1_val = dense1.forward(X_val)
    A1_val = relu1.forward(Z1_val)
    Z2_val = dense2.forward(A1_val)
    Y_val_pred = softmax(Z2_val)

    val_acc = accuracy(Y_val_pred, y_val)

    if epoch % 20 == 0 or epoch == 1:
        print(f"Epoch {epoch:3d} | Loss {epoch_loss/nb:.4f} | Val Acc {val_acc:.4f}")


Epoch   1 | Loss 1.0993 | Val Acc 0.0133
Epoch  20 | Loss 1.0985 | Val Acc 0.3133
Epoch  40 | Loss 1.0975 | Val Acc 0.3133
Epoch  60 | Loss 1.0964 | Val Acc 0.3133
Epoch  80 | Loss 1.0950 | Val Acc 0.3200
Epoch 100 | Loss 1.0930 | Val Acc 0.7267
Epoch 120 | Loss 1.0901 | Val Acc 0.9200
Epoch 140 | Loss 1.0854 | Val Acc 0.9200
Epoch 160 | Loss 1.0789 | Val Acc 0.9667
Epoch 180 | Loss 1.0690 | Val Acc 0.9733
Epoch 200 | Loss 1.0549 | Val Acc 0.9733


In [26]:
def confusion_matrix(y_true, y_pred, num_classes):
    cm = np.zeros((num_classes, num_classes), dtype=int)
    for t, p in zip(y_true, y_pred):
        cm[t, p] += 1
    return cm


In [27]:
def precision_recall_f1(cm):
    num_classes = cm.shape[0]
    precision = np.zeros(num_classes)
    recall = np.zeros(num_classes)
    f1 = np.zeros(num_classes)

    for k in range(num_classes):
        TP = cm[k, k]
        FP = np.sum(cm[:, k]) - TP
        FN = np.sum(cm[k, :]) - TP

        precision[k] = TP / (TP + FP + 1e-12)
        recall[k] = TP / (TP + FN + 1e-12)
        f1[k] = 2 * precision[k] * recall[k] / (precision[k] + recall[k] + 1e-12)

    return precision, recall, f1


In [28]:
def macro_weighted_scores(cm, precision, recall, f1):
    support = np.sum(cm, axis=1)  # samples per true class
    total = np.sum(support)

    macro_p = np.mean(precision)
    macro_r = np.mean(recall)
    macro_f1 = np.mean(f1)

    weighted_p = np.sum(precision * support) / (total + 1e-12)
    weighted_r = np.sum(recall * support) / (total + 1e-12)
    weighted_f1 = np.sum(f1 * support) / (total + 1e-12)

    return {
        "macro_precision": macro_p,
        "macro_recall": macro_r,
        "macro_f1": macro_f1,
        "weighted_precision": weighted_p,
        "weighted_recall": weighted_r,
        "weighted_f1": weighted_f1
    }


In [29]:
def log_loss(Y_pred_probs, y_true, num_classes):
    Y_true_oh = np.zeros((y_true.shape[0], num_classes))
    Y_true_oh[np.arange(y_true.shape[0]), y_true] = 1
    return cross_entropy_loss(Y_pred_probs, Y_true_oh)


In [30]:
def top_k_accuracy(Y_pred_probs, y_true, k=2):
    top_k = np.argsort(Y_pred_probs, axis=1)[:, -k:]  # top k indices
    correct = 0
    for i in range(y_true.shape[0]):
        if y_true[i] in top_k[i]:
            correct += 1
    return correct / y_true.shape[0]



In [31]:
def avg_confidence(Y_pred_probs):
    return np.mean(np.max(Y_pred_probs, axis=1))


In [32]:
def expected_calibration_error(Y_pred_probs, y_true, n_bins=10):
    confidences = np.max(Y_pred_probs, axis=1)
    predictions = np.argmax(Y_pred_probs, axis=1)
    accuracies = (predictions == y_true).astype(float)

    ece = 0.0
    bin_edges = np.linspace(0.0, 1.0, n_bins + 1)

    for i in range(n_bins):
        low, high = bin_edges[i], bin_edges[i + 1]
        mask = (confidences > low) & (confidences <= high)

        if np.sum(mask) == 0:
            continue

        bin_acc = np.mean(accuracies[mask])
        bin_conf = np.mean(confidences[mask])
        ece += (np.sum(mask) / len(y_true)) * np.abs(bin_acc - bin_conf)

    return ece


In [33]:
def evaluate_model(Y_pred_probs, y_true, num_classes):
    y_pred = np.argmax(Y_pred_probs, axis=1)

    acc = np.mean(y_pred == y_true)
    cm = confusion_matrix(y_true, y_pred, num_classes)

    precision, recall, f1 = precision_recall_f1(cm)
    avg_scores = macro_weighted_scores(cm, precision, recall, f1)

    loss = log_loss(Y_pred_probs, y_true, num_classes)
    top2 = top_k_accuracy(Y_pred_probs, y_true, k=2)

    conf = avg_confidence(Y_pred_probs)
    ece = expected_calibration_error(Y_pred_probs, y_true, n_bins=10)

    return {
        "accuracy": acc,
        "log_loss": loss,
        "top2_accuracy": top2,
        "avg_confidence": conf,
        "ece": ece,
        "confusion_matrix": cm,
        "precision_per_class": precision,
        "recall_per_class": recall,
        "f1_per_class": f1,
        **avg_scores
    }


In [34]:
# Y_val_pred_probs = softmax(Z2_val)
results = evaluate_model(Y_val_pred, y_val, num_classes)

print("Accuracy:", results["accuracy"])
print("Log Loss:", results["log_loss"])
print("Top-2 Acc:", results["top2_accuracy"])
print("Avg Confidence:", results["avg_confidence"])
print("ECE:", results["ece"])

print("\nConfusion Matrix:\n", results["confusion_matrix"])
print("\nF1 per class:", results["f1_per_class"])
print("Macro F1:", results["macro_f1"])
print("Weighted F1:", results["weighted_f1"])


Accuracy: 0.9733333333333334
Log Loss: 1.051850390446666
Top-2 Acc: 1.0
Avg Confidence: 0.34944069118327026
ECE: 0.6238926421500631

Confusion Matrix:
 [[47  0  0]
 [ 1 49  2]
 [ 1  0 50]]

F1 per class: [0.97916667 0.97029703 0.97087379]
Macro F1: 0.9734458275919486
Weighted F1: 0.973272279897367
