<a href="https://colab.research.google.com/github/beingshub02/Deep-Learning-Summer-School-IIITDM/blob/main/25DLS455ASSIGNMENT3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

P-1. Data Preparation

In [6]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target.reshape(-1, 1)  # Reshape to column vector for encoding

# One-hot encode the labels (compatible with newer sklearn)
encoder = OneHotEncoder(sparse_output=False)
y_encoded = encoder.fit_transform(y)

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42
)

# Print shapes
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


X_train shape: (120, 4)
X_test shape: (30, 4)
y_train shape: (120, 3)
y_test shape: (30, 3)


P-2. Activation function

In [2]:
def get_activation_funcs(name):
    if name == "sigmoid":
        return lambda x: 1 / (1 + np.exp(-x)), lambda x: (x * (1 - x))  # Use output for derivative
    elif name == "relu":
        return lambda x: np.maximum(0, x), lambda x: (x > 0).astype(float)
    elif name == "tanh":
        return lambda x: np.tanh(x), lambda x: 1 - np.tanh(x)**2
    elif name == "leaky_relu":
        return lambda x: np.where(x > 0, x, 0.01 * x), lambda x: np.where(x > 0, 1, 0.01)
    else:
        raise ValueError("Unsupported activation")

# Test activations
sample_input = np.array([[0, 1, -1]])

sigmoid, _ = get_activation_funcs("sigmoid")
relu, _ = get_activation_funcs("relu")

print("Sigmoid output:", sigmoid(sample_input))
print("ReLU output:", relu(sample_input))


Sigmoid output: [[0.5        0.73105858 0.26894142]]
ReLU output: [[0 1 0]]


P-3. Train the MLP Model on Iris Data

In [7]:
def train_model(X_train, y_train, X_test, y_test, activation_name, epochs=1000, lr=0.01, hidden_units=10):
    act, act_deriv = get_activation_funcs(activation_name)

    input_dim = X_train.shape[1]
    output_dim = y_train.shape[1]

    # Initialize weights
    W1 = np.random.randn(input_dim, hidden_units) * 0.1
    b1 = np.zeros((1, hidden_units))
    W2 = np.random.randn(hidden_units, output_dim) * 0.1
    b2 = np.zeros((1, output_dim))

    losses = []

    for epoch in range(epochs):
        # Forward pass
        z1 = X_train @ W1 + b1
        a1 = act(z1)
        z2 = a1 @ W2 + b2
        out = 1 / (1 + np.exp(-z2))  # Output activation (sigmoid)

        # Loss (MSE)
        loss = np.mean((out - y_train) ** 2)
        losses.append(loss)

        # Backward pass
        d_out = (out - y_train) * out * (1 - out)
        dW2 = a1.T @ d_out
        db2 = np.sum(d_out, axis=0, keepdims=True)

        d_hidden = d_out @ W2.T * act_deriv(a1)
        dW1 = X_train.T @ d_hidden
        db1 = np.sum(d_hidden, axis=0, keepdims=True)

        # Gradient descent
        W1 -= lr * dW1
        b1 -= lr * db1
        W2 -= lr * dW2
        b2 -= lr * db2

    # Define predict function
    def predict(X):
        z1 = X @ W1 + b1
        a1 = act(z1)
        z2 = a1 @ W2 + b2
        probs = 1 / (1 + np.exp(-z2))
        return np.argmax(probs, axis=1)

    y_pred = predict(X_test)
    y_true = np.argmax(y_test, axis=1)
    accuracy = np.mean(y_pred == y_true)

    return losses, accuracy, predict


P-4. Report Accuracy of Each Activation

In [4]:
print("Accuracies:")
print(accuracies)


Accuracies:
{}


P-5. Predict Class of a New Sample

In [12]:
import numpy as np
from sklearn.preprocessing import StandardScaler

# P-5: Predict Class of a New Sample

# New sample
sample = np.array([[5.1, 3.5, 1.4, 0.2]])

# Use the same scaler used in P-1
sample_scaled = scaler.transform(sample)

# Use the trained ReLU model from P-3 (retrain if needed)
_, _, model_predict_relu = train_model(X_train, y_train, X_test, y_test, activation_name="relu")

# Predict the class
pred_class = model_predict_relu(sample_scaled)

# Print the predicted class
print("\nPredicted class index for sample [5.1, 3.5, 1.4, 0.2]:", pred_class[0])



Predicted class index for sample [5.1, 3.5, 1.4, 0.2]: 0
