<a href="https://colab.research.google.com/github/inderpreetsingh01/ml_machine_coding/blob/main/DL_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# dropout

In [None]:
import numpy as np

class Dropout:
    def __init__(self, p=0.5):
        """
        Dropout layer.
        p : float
            Probability of dropping a unit (0 <= p < 1).
        """
        assert 0 <= p < 1, "Dropout probability must be in [0, 1)."
        self.p = p
        self.mask = None
        self.training = True  # Default: training mode

    def forward(self, X):
        if self.training:
            # Create mask: 1 with probability (1-p), 0 with probability p
            self.mask = (np.random.rand(*X.shape) > self.p).astype(np.float32)

            # Scale activations to maintain expected value
            return (X * self.mask) / (1.0 - self.p)
        else:
            # In evaluation mode, just return input
            return X

    def backward(self, d_out):
        """
        Backprop through dropout (apply same mask).
        """
        if self.training:
            return (d_out * self.mask) / (1.0 - self.p)
        else:
            return d_out

    def eval(self):
        """Switch to inference mode (no dropout applied)."""
        self.training = False

    def train(self):
        """Switch to training mode (dropout applied)."""
        self.training = True

In [None]:
np.random.seed(42)

dropout = Dropout(p=0.3)

X = np.array([[1.0, 2.0, 3.0],
              [4.0, 5.0, 6.0]])

# Training mode
dropout.train()
out_train = dropout.forward(X)
print("Training output:\n", out_train)

# Evaluation mode
dropout.eval()
out_eval = dropout.forward(X)
print("Eval output:\n", out_eval)

In [None]:
# Relu

In [None]:
import numpy as np

class ReLU:
    def __init__(self):
        self.mask = None  # stores positions of positive values

    def forward(self, X):
        """Forward pass of ReLU activation"""
        self.mask = (X > 0).astype(np.float32)
        return X * self.mask

    def backward(self, d_out):
        """Backward pass: pass gradient only where input was > 0"""
        return d_out * self.mask


In [None]:
relu = ReLU()

X = np.array([[-1, 2, -3],
              [4, -5, 6]])

# Forward pass
out_forward = relu.forward(X)
print("Forward output:\n", out_forward)

# Backward pass (gradient from next layer = ones)
d_out = np.ones_like(X)
out_backward = relu.backward(d_out)
print("Backward output (gradients):\n", out_backward)

In [None]:
# Sigmoid

In [None]:
import numpy as np

class Sigmoid:
    def __init__(self):
        self.out = None  # store forward output for backward

    def forward(self, X):
        """Forward pass of Sigmoid activation"""
        self.out = 1 / (1 + np.exp(-X))
        return self.out

    def backward(self, d_out):
        """Backward pass: apply derivative of sigmoid"""
        return d_out * (self.out * (1 - self.out))


In [None]:
sigmoid = Sigmoid()

X = np.array([[-1, 0, 1],
              [2, -2, 3]])

# Forward pass
out_forward = sigmoid.forward(X)
print("Forward output:\n", out_forward)

# Backward pass (gradients from next layer = ones)
d_out = np.ones_like(X)
out_backward = sigmoid.backward(d_out)
print("Backward output (gradients):\n", out_backward)


In [None]:
# softmax

In [None]:
import numpy as np

class Softmax:
    def __init__(self):
        self.out = None  # store probabilities

    def forward(self, X):
        """Forward pass of Softmax activation"""
        # Shift values for numerical stability
        shift_X = X - np.max(X, axis=1, keepdims=True)
        exp_X = np.exp(shift_X)
        self.out = exp_X / np.sum(exp_X, axis=1, keepdims=True)
        return self.out

    def backward(self, d_out):
        """
        Backward pass of softmax.
        d_out: gradient from next layer (usually loss)
        """
        batch_size, num_classes = self.out.shape
        d_input = np.empty_like(d_out)

        for i in range(batch_size):
            y = self.out[i].reshape(-1, 1)   # column vector
            jacobian = np.diagflat(y) - np.dot(y, y.T)
            d_input[i] = np.dot(jacobian, d_out[i])

        return d_input

In [None]:
softmax = Softmax()

X = np.array([[1, 2, 3],
              [0.5, 1.5, -1]])

# Forward pass
out_forward = softmax.forward(X)
print("Forward output (probabilities):\n", out_forward)

# Backward pass (gradient from next layer = ones)
d_out = np.ones_like(X)
out_backward = softmax.backward(d_out)
print("Backward output (gradients):\n", out_backward)

In [None]:
# binary cross entropy loss

In [None]:
import numpy as np

class BinaryCrossEntropyLoss:
    def __init__(self):
        self.y_true = None
        self.y_pred = None

    def forward(self, y_true, y_pred):
        """
        Compute binary cross-entropy loss.
        y_true: (n_samples,) ground truth labels (0 or 1)
        y_pred: (n_samples,) predicted probabilities (0 < p < 1)
        """
        self.y_true = y_true
        self.y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)  # prevent log(0)
        loss = -np.mean(
            y_true * np.log(self.y_pred) + (1 - y_true) * np.log(1 - self.y_pred)
        )
        return loss

    def backward(self):
        """
        Compute gradient of BCE loss w.r.t. predictions.
        Gradient is averaged over batch.
        """
        n = self.y_true.shape[0]
        grad = (self.y_pred - self.y_true) / (self.y_pred * (1 - self.y_pred) * n)
        return grad

In [None]:
bce = BinaryCrossEntropyLoss()

y_true = np.array([0, 1, 1, 0])  # true labels
y_pred = np.array([0.1, 0.9, 0.8, 0.2])  # predicted probs

# Forward pass (loss)
loss = bce.forward(y_true, y_pred)
print("BCE Loss:", loss)

# Backward pass (gradients)
grad = bce.backward()
print("Gradients:", grad)

In [None]:
# cross entropy loss

In [None]:
import numpy as np

class CrossEntropyLoss:
    def __init__(self):
        self.y_true = None
        self.y_pred = None

    def forward(self, y_true, y_pred):
        """
        Compute multi-class cross entropy loss.

        y_true: (n_samples,) class indices or (n_samples, n_classes) one-hot
        y_pred: (n_samples, n_classes) predicted probabilities (after softmax)
        """
        self.y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
        n_samples = y_pred.shape[0]

        # If labels are given as class indices
        if y_true.ndim == 1:
            self.y_true = y_true
            log_likelihood = -np.log(self.y_pred[np.arange(n_samples), y_true])
            loss = np.mean(log_likelihood)
        else:
            # One-hot encoded labels
            self.y_true = np.argmax(y_true, axis=1)
            loss = -np.mean(np.sum(y_true * np.log(self.y_pred), axis=1))

        return loss

    def backward(self):
        """
        Compute gradient of loss w.r.t predictions.
        """
        n_samples = self.y_pred.shape[0]
        grad = self.y_pred.copy()
        grad[np.arange(n_samples), self.y_true] -= 1
        grad /= n_samples
        return grad

In [None]:
softmax_out = np.array([
    [0.1, 0.7, 0.2],
    [0.8, 0.1, 0.1],
    [0.2, 0.2, 0.6]
])

# Labels as class indices
y_true_idx = np.array([1, 0, 2])

# Labels as one-hot
y_true_onehot = np.array([
    [0, 1, 0],
    [1, 0, 0],
    [0, 0, 1]
])

cel = CrossEntropyLoss()

# Forward with indices
loss_idx = cel.forward(y_true_idx, softmax_out)
print("Cross-Entropy Loss (indices):", loss_idx)

# Backward pass
grad = cel.backward()
print("Gradients (w.r.t predictions):\n", grad)

In [None]:
# softmax + cross entropy loss
import numpy as np

class SoftmaxCrossEntropyLoss:
    def __init__(self):
        self.probs = None
        self.y_true = None

    def forward(self, logits, y_true):
        """
        Compute softmax + cross entropy loss in one step.

        logits: (n_samples, n_classes) raw scores
        y_true: (n_samples,) class indices or (n_samples, n_classes) one-hot
        """
        # Shift logits for numerical stability
        shift_logits = logits - np.max(logits, axis=1, keepdims=True)
        exp_logits = np.exp(shift_logits)
        self.probs = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)

        n_samples = logits.shape[0]

        if y_true.ndim == 1:  # class indices
            self.y_true = y_true
            log_likelihood = -np.log(self.probs[np.arange(n_samples), y_true])
            loss = np.mean(log_likelihood)
        else:  # one-hot encoded
            self.y_true = np.argmax(y_true, axis=1)
            loss = -np.mean(np.sum(y_true * np.log(self.probs), axis=1))

        return loss

    def backward(self):
        """
        Gradient of softmax + cross entropy w.r.t logits.
        """
        n_samples = self.probs.shape[0]
        grad = self.probs.copy()
        grad[np.arange(n_samples), self.y_true] -= 1
        grad /= n_samples
        return grad

In [None]:
logits = np.array([
    [1.0, 2.0, 3.0],
    [1.0, 3.0, 0.5],
    [2.0, 1.0, 0.1]
])

# Labels as class indices
y_true = np.array([2, 1, 0])

sce = SoftmaxCrossEntropyLoss()

# Forward pass
loss = sce.forward(logits, y_true)
print("Softmax + CrossEntropy Loss:", loss)

# Backward pass
grad = sce.backward()
print("Gradients (w.r.t logits):\n", grad)

In [None]:
# MLP

In [1]:
import numpy as np

# ---------- Utility Functions ----------
def relu(x):
    return np.maximum(0, x)

def relu_backward(dout, x):
    dx = dout.copy()
    dx[x <= 0] = 0
    return dx

# ---------- Loss: Softmax + CrossEntropy ----------
class SoftmaxCrossEntropyLoss:
    def __init__(self):
        self.probs = None
        self.y_true = None

    def forward(self, logits, y_true):
        # Shift logits for numerical stability
        shift_logits = logits - np.max(logits, axis=1, keepdims=True)
        exp_logits = np.exp(shift_logits)
        self.probs = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)

        n_samples = logits.shape[0]

        if y_true.ndim == 1:  # class indices
            self.y_true = y_true
            log_likelihood = -np.log(self.probs[np.arange(n_samples), y_true])
            loss = np.mean(log_likelihood)
        else:  # one-hot labels
            self.y_true = np.argmax(y_true, axis=1)
            loss = -np.mean(np.sum(y_true * np.log(self.probs), axis=1))
        return loss

    def backward(self):
        n_samples = self.probs.shape[0]
        grad = self.probs.copy()
        grad[np.arange(n_samples), self.y_true] -= 1
        grad /= n_samples
        return grad

# ---------- Simple MLP ----------
class MLP:
    def __init__(self, input_dim, hidden_dim, output_dim, lr=0.1):
        self.lr = lr
        # Xavier init
        self.W1 = np.random.randn(input_dim, hidden_dim) / np.sqrt(input_dim)
        self.b1 = np.zeros((1, hidden_dim))
        self.W2 = np.random.randn(hidden_dim, output_dim) / np.sqrt(hidden_dim)
        self.b2 = np.zeros((1, output_dim))

    def forward(self, X):
        self.X = X
        self.z1 = X @ self.W1 + self.b1
        self.a1 = relu(self.z1)
        self.z2 = self.a1 @ self.W2 + self.b2
        return self.z2  # logits

    def backward(self, dlogits):
        # Gradients for W2, b2
        dW2 = self.a1.T @ dlogits
        db2 = np.sum(dlogits, axis=0, keepdims=True)

        # Backprop through ReLU
        da1 = dlogits @ self.W2.T
        dz1 = relu_backward(da1, self.z1)

        # Gradients for W1, b1
        dW1 = self.X.T @ dz1
        db1 = np.sum(dz1, axis=0, keepdims=True)

        # Update parameters
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2

# ---------- Training Example ----------
np.random.seed(42)

# Dummy dataset (4 samples, 2 classes, 2D input)
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([0,1,1,0])  # XOR labels

# Model
mlp = MLP(input_dim=2, hidden_dim=4, output_dim=2, lr=0.1)
criterion = SoftmaxCrossEntropyLoss()

# Training
for epoch in range(200):
    logits = mlp.forward(X)
    loss = criterion.forward(logits, y)
    grad = criterion.backward()
    mlp.backward(grad)

    if (epoch+1) % 50 == 0:
        preds = np.argmax(criterion.probs, axis=1)
        acc = np.mean(preds == y)
        print(f"Epoch {epoch+1}, Loss={loss:.4f}, Acc={acc:.2f}")

Epoch 50, Loss=0.5897, Acc=0.75
Epoch 100, Loss=0.4609, Acc=1.00
Epoch 150, Loss=0.3563, Acc=1.00
Epoch 200, Loss=0.2676, Acc=1.00
