# Task 1 - AND Gate Perceptron Model

In [None]:
import numpy as np

In [None]:
def activation_function(x):
    return 1 if x >= 0 else 0

def perceptron_model(x, w, b):
    return activation_function(np.dot(x, w) + b)

In [None]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([0, 0, 0, 1])

In [None]:
learning_rate = 0.1
epochs = 10

w = np.array([0.5, 0.5])
b = 0.0

Perceptron Model:

In [None]:
for epoch in range(epochs):
    print("Epoch:", epoch)
    print("  w:", w, "  b:", b)
    for i in range(len(X)):
        x = X[i]
        y = Y[i]
        y_pred = perceptron_model(x, w, b)
        error = y - y_pred
        w = w + learning_rate * error * x
        b = b + learning_rate * error

Epoch: 0
  w: [0.5 0.5]   b: 0.0
Epoch: 1
  w: [0.4 0.4]   b: -0.30000000000000004
Epoch: 2
  w: [0.3 0.3]   b: -0.5
Epoch: 3
  w: [0.3 0.3]   b: -0.5
Epoch: 4
  w: [0.3 0.3]   b: -0.5
Epoch: 5
  w: [0.3 0.3]   b: -0.5
Epoch: 6
  w: [0.3 0.3]   b: -0.5
Epoch: 7
  w: [0.3 0.3]   b: -0.5
Epoch: 8
  w: [0.3 0.3]   b: -0.5
Epoch: 9
  w: [0.3 0.3]   b: -0.5


# Task 2 - Iris Dataset

In [None]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [None]:
iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

encoder = OneHotEncoder()
y_encoder = encoder.fit(y_train.reshape(-1, 1))
y_train_encoded = y_encoder.transform(y_train.reshape(-1, 1)).toarray()

In [None]:
def sigmoid(x):
    # Apply clipping to prevent overflow or underflow
    x = np.clip(x, -500, 500)
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def perceptron_model(x, w, b):
    return sigmoid(np.dot(x, w) + b)

training function

In [None]:
def train(X, y, learning_rate, epochs, hidden_nodes):
    input_nodes = X.shape[1]
    output_nodes = y.shape[1]
    w1 = np.random.randn(input_nodes, hidden_nodes)
    # hidden_nodes is number of nodes in hidden layer
    b1 = np.zeros((1, hidden_nodes)) #bias vector for the hidden layer
    w2 = np.random.randn(hidden_nodes, output_nodes)
    b2 = np.zeros((1, output_nodes))

    for epoch in range(epochs):
        # Forward propagation
        z1 = np.dot(X, w1) + b1
        a1 = sigmoid(z1)
        z2 = np.dot(a1, w2) + b2
        a2 = sigmoid(z2)

        # Backward propagation
        error = y - a2
        delta2 = error * sigmoid_derivative(a2)
        delta1 = np.dot(delta2, w2.T) * sigmoid_derivative(a1)

        w2 += learning_rate * np.dot(a1.T, delta2)
        b2 += learning_rate * np.sum(delta2, axis=0, keepdims=True)
        w1 += learning_rate * np.dot(X.T, delta1)
        b1 += learning_rate * np.sum(delta1, axis=0)

    y_pred = np.argmax(a2, axis=1)
    accuracy = np.mean(y_pred == y_train)
    print("Accuracy:", accuracy)

    return w1, b1, w2, b2

The purpose of using np.argmax() in this code is to convert the output of the neural network, which is a matrix of probabilities, into a vector of predicted class labels. The np.argmax() function selects the index of the maximum probability along each row of the output matrix, which corresponds to the predicted class label for that input. The resulting vector of predicted class labels is then compared to the target variable y_train to calculate the accuracy of the neural network.

In [None]:
learning_rate = 0.1
epochs = 1000
hidden_nodes = 10
w1, b1, w2, b2 = train(X_train, y_train_encoded, learning_rate, epochs, hidden_nodes)

Accuracy: 0.7916666666666666


In [None]:
learning_rate = 0.01
epochs = 1000
hidden_nodes = 10
w1, b1, w2, b2 = train(X_train, y_train_encoded, learning_rate, epochs, hidden_nodes)

Accuracy: 0.975


In [None]:
learning_rate = 0.1
epochs = 500
hidden_nodes = 10
w1, b1, w2, b2 = train(X_train, y_train_encoded, learning_rate, epochs, hidden_nodes)

Accuracy: 0.6583333333333333


In [None]:
learning_rate = 0.1
epochs = 1000
hidden_nodes = 5
w1, b1, w2, b2 = train(X_train, y_train_encoded, learning_rate, epochs, hidden_nodes)

Accuracy: 0.3416666666666667


# For 2 hidden layers

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Perform train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the OneHotEncoder
encoder = OneHotEncoder()
y_encoder = encoder.fit(y_train.reshape(-1, 1))

# Transform the training labels
y_train_encoded = y_encoder.transform(y_train.reshape(-1, 1)).toarray()

# Define the activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Define the derivative of the activation function
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

# Define the perceptron function
def perceptron_model(x, w, b):
    return sigmoid(np.dot(x, w) + b)

# Define the training function
def train(X, y, learning_rate, epochs, hidden_nodes1, hidden_nodes2):
    input_nodes = X.shape[1]
    output_nodes = y.shape[1]
    w1 = np.random.randn(input_nodes, hidden_nodes1)
    b1 = np.zeros((1, hidden_nodes1))
    w2 = np.random.randn(hidden_nodes1, hidden_nodes2)
    b2 = np.zeros((1, hidden_nodes2))
    w3 = np.random.randn(hidden_nodes2, output_nodes)
    b3 = np.zeros((1, output_nodes))

    for epoch in range(epochs):
        # Forward propagation
        z1 = np.dot(X, w1) + b1
        a1 = sigmoid(z1)
        z2 = np.dot(a1, w2) + b2
        a2 = sigmoid(z2)
        z3 = np.dot(a2, w3) + b3
        a3 = sigmoid(z3)

        # Backward propagation
        error = y - a3
        delta3 = error * sigmoid_derivative(a3)
        delta2 = np.dot(delta3, w3.T) * sigmoid_derivative(a2)
        delta1 = np.dot(delta2, w2.T) * sigmoid_derivative(a1)

        # Weight updates
        w3 += learning_rate * np.dot(a2.T, delta3)
        b3 += learning_rate * np.sum(delta3, axis=0, keepdims=True)
        w2 += learning_rate * np.dot(a1.T, delta2)
        b2 += learning_rate * np.sum(delta2, axis=0, keepdims=True)
        w1 += learning_rate * np.dot(X.T, delta1)
        b1 += learning_rate * np.sum(delta1, axis=0, keepdims=True)

    y_pred = np.argmax(a3, axis=1)
    accuracy = np.mean(y_pred == y_train)
    print("Accuracy:", accuracy)

    return w1, b1, w2, b2, w3, b3

# Set hyperparameters and train the model
learning_rate = 0.1
epochs = 1000
hidden_nodes1 = 10
hidden_nodes2 = 10
w1, b1, w2, b2, w3, b3 = train(X_train, y_train_encoded, learning_rate, epochs, hidden_nodes1, hidden_nodes2)

# Test the model
z1 = np.dot(X_test, w1) + b1
a1 = sigmoid(z1)
z2 = np.dot(a1, w2) + b2
a2 = sigmoid(z2)
z3 = np.dot(a2, w3) + b3
a3 = sigmoid(z3)
y_pred = np.argmax(a3, axis=1)
accuracy = np.mean(y_pred == y_test)
print("Accuracy:", accuracy)
