<a href="https://colab.research.google.com/github/fatmaT2001/CustomNeuralNetwork_Implementation/blob/main/NeuralNetworkFromScratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from keras.datasets import mnist
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.utils import shuffle
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from keras.models import Sequential
from keras.layers import Dense




Implement input, hidden, and output layers concerning input-output shape

In [None]:
def one_hot_encode(class_label, num_classes):
    """
    Convert a class label into a one-hot encoded vector.

    Parameters:
    class_label (int): The class label to be converted.
    num_classes (int): The total number of classes.

    Returns:
    numpy.ndarray: A one-hot encoded vector.
    """
    one_hot_vector = np.zeros(num_classes)
    one_hot_vector[class_label] = 1
    return one_hot_vector

# Activation functions

## Rule

In [None]:
def Relu (sum):
    return np.maximum(sum, 0)


## sigmoid

In [None]:
def sigmoid(sum):
    return 1 / (1 + np.exp(-sum))

#NN Module

##NN layers And weights Initialization

In [None]:
import numpy as np

def nn_layers(data, label, hidden_layers):
    """
    Initialize the layers of a neural network.

    Parameters:
    data: The input data, augmented with a column of ones for the bias, shape (M, N+1)
    label: The output labels, shape (M, 1)
    hidden_layers: List containing the number of neurons in each hidden layer

    Returns:
    weights: List of weight matrices for each layer.
    """

    # Number of neurons in each layer
    input_layer_neurons = data.shape[1]  # N+1 (including bias)
    weights = []

    # Initialize weights for the input layer to the first hidden layer
    weights.append(np.random.randn(hidden_layers[0], input_layer_neurons))

    # Initialize weights for subsequent hidden layers
    for i in range(1, len(hidden_layers)):
        layer_weights = np.random.randn(hidden_layers[i], hidden_layers[i-1])
        weights.append(layer_weights)

    # Initialize weights for the output layer
    output_layer_neurons = len(np.unique(label))
    output_weights = np.random.randn(output_layer_neurons, hidden_layers[-1])
    weights.append(output_weights)


  # Round the weights to the specified number of decimals
    for i in range(len(weights)):
        weights[i] = np.round(weights[i], 4)

    return weights



##forward propagarion

In [None]:
def forward(weights, record):
    """
    Perform the forward pass of the neural network.

    Parameters:
    W1: Weight matrix from input layer to hidden layer, shape (n_hidden_units, n_input_features)
    W2: Weight matrix from hidden layer to output layer, shape (n_output_units, n_hidden_units)
    record: Input data, shape (n_input_features, n_samples)

    Returns:
    hidden_layer_input: Activations of the hidden layer before applying the activation function, shape (n_hidden_units, n_samples)
    hidden_layer_output: Activations of the output layer after applying the activation function, shape (n_output_units, n_samples)
    """

    # Apply the ReLU activation function to the input data
    # Assuming Relu is a function that applies the ReLU activation element-wise
    relu_input = Relu(record)  # Replace with sigmoid if needed, shape (n_input_features, n_samples)
    hidden_layer_output=[]
    # Calculate the input to the hidden layer
    hidden_layer_output.append(sigmoid(weights[0] @ relu_input))  # Apply sigmoid activation function, shape (n_hidden_units, n_samples)
    for i in range(1,len(weights)-1):
      current=sigmoid(weights[i] @ hidden_layer_output[-1])
      hidden_layer_output.append(current)  # Apply sigmoid activation function, shape (n_output_units, n_samples)


    # Calculate the output from the hidden layer, which serves as input to the output layer
    preductions = sigmoid(weights[-1] @ hidden_layer_output[-1])  # Apply sigmoid activation function, shape (n_output_units, n_samples)

    # Round the weights to the specified number of decimals
    for i in range(len(hidden_layer_output)):
        hidden_layer_output[i] = np.round(hidden_layer_output[i], 4)

    return hidden_layer_output,np.round(preductions,4)



##Backword Propagation

In [None]:
def Backward(input_data, target, learning_rate, weights, hidden_layer_outputs, predictions):
    """
    Perform the backward pass of a neural network, updating the weights dynamically.

    Parameters:
    input_data: Input data, shape (n_input_features, n_samples)
    target: Expected target output, shape (n_output_units, n_samples)
    learning_rate: Scalar value indicating the step size during gradient descent
    weights: List of weight matrices for each layer in the neural network
    hidden_layer_outputs: List of outputs for each hidden layer
    predictions: Final output predictions of the neural network

    Returns:
    Updated weights
    """
    # Initialize delta for the output layer
    delta = predictions * (1 - predictions) * (target - predictions)

    for i in range(len(weights) - 1, 0, -1):
        # Determine the input to the current layer
        layer_input = hidden_layer_outputs[i-1] if i >= 1 else input_data
        # Calculate the weight update for the current layer
        delta_w = learning_rate * (delta @ layer_input.T)
        weights[i] += delta_w
        # Calculate the delta for the next layer
        if i >= 1:
            delta = (weights[i].T @ delta) * hidden_layer_outputs[i-1] * (1 - hidden_layer_outputs[i-1])

    # Update for the first hidden layer
    delta_w = learning_rate * (delta @ input_data.T)
    weights[0] += delta_w
    # Round the weights to the specified number of decimals
    for i in range(len(weights)):
        weights[i] = np.round(weights[i], 4)

    return weights



# Test My model on both datasets

In [None]:
def evaluate_model(X_test, y_test, weights, num_classes):
    """
    Evaluate the neural network model on the test data.

    Parameters:
    X_test: Test data, DataFrame or numpy array
    y_test: True labels for the test data, Series or numpy array
    weights: Learned weights from the neural network
    num_classes: Number of classes in the classification task

    Returns:
    accuracy: The accuracy of the model on the test data
    """
    correct_predictions = 0
    total_predictions = len(X_test)

    for record in range(total_predictions):
        # Reshape a row of X_test to a column vector
        data = X_test.iloc[record].values.reshape(-1, 1)

        # Perform a forward pass
        _, predictions = forward(weights, data)

        # Get the index of the maximum value in predictions, which represents the predicted class
        predicted_class = np.argmax(predictions, axis=0)[0]

        # Extract the full prediction probabilities
        prediction_probabilities = predictions.flatten()

        # Check if prediction matches the true label
        true_class =int( y_test.iloc[record])
        if predicted_class == true_class:
            correct_predictions += 1

        # Print the record number, prediction probabilities, predicted class, and true class
        print(f"Record {record + 1}/{total_predictions} - Prediction Probabilities: {prediction_probabilities}, Predicted Class: {predicted_class}, True Class: {true_class}")

    # Calculate accuracy
    accuracy = correct_predictions / total_predictions
    print(f"\nModel Accuracy: {accuracy * 100:.2f}%")
    return accuracy


## iris dataset

In [None]:
# Load Iris dataset
iris = load_iris()
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)

# Add the target to the Iris dataframe
iris_df['target'] = iris.target

# Show the first few rows of the dataframe
data_label =iris_df.drop(columns=['target'])
iris_label=iris_df['target']




In [None]:
hidden_layers = [5]
weights_iris = nn_layers(data_label, iris_label, hidden_layers)
X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(data_label, iris_label, test_size=0.2, random_state=42)

In [None]:

iterations = 1000
num_classes = len(np.unique(iris_label))
learning_rate = 0.2  # Consider starting with a smaller learning rate

for i in range(iterations):
    X_train_shuffled_iris, y_train_shuffled_iris = shuffle(X_train_iris, y_train_iris)

    for record in range(len(X_train_shuffled_iris)):
        data = X_train_shuffled_iris.iloc[record].values.reshape(-1, 1)
        target = one_hot_encode(y_train_shuffled_iris.iloc[record], num_classes).reshape(-1, 1)

        hidden_layer_outputs, predictions = forward(weights_iris, data)
        weights_iris = Backward(data, target, learning_rate, weights_iris, hidden_layer_outputs, predictions)



In [None]:
accuracy = evaluate_model(X_test_iris, y_test_iris, weights_iris, num_classes)
print("Model Accuracy:", accuracy)


Record 1/30 - Prediction Probabilities: [0.0134 0.9902 0.0054], Predicted Class: 1, True Class: 1
Record 2/30 - Prediction Probabilities: [0.9852 0.0159 0.0016], Predicted Class: 0, True Class: 0
Record 3/30 - Prediction Probabilities: [9.00e-04 4.62e-02 9.57e-01], Predicted Class: 2, True Class: 2
Record 4/30 - Prediction Probabilities: [0.0131 0.9895 0.0058], Predicted Class: 1, True Class: 1
Record 5/30 - Prediction Probabilities: [0.0139 0.9915 0.0047], Predicted Class: 1, True Class: 1
Record 6/30 - Prediction Probabilities: [0.985  0.0161 0.0016], Predicted Class: 0, True Class: 0
Record 7/30 - Prediction Probabilities: [0.0161 0.9915 0.0041], Predicted Class: 1, True Class: 1
Record 8/30 - Prediction Probabilities: [0.001  0.0553 0.9486], Predicted Class: 2, True Class: 2
Record 9/30 - Prediction Probabilities: [0.001  0.0625 0.9443], Predicted Class: 2, True Class: 1
Record 10/30 - Prediction Probabilities: [0.0149 0.9922 0.0041], Predicted Class: 1, True Class: 1
Record 11/30 

## MNIST Dataset

In [None]:
import pandas as pd
from sklearn import datasets

# Load the MNIST digits dataset
mnist = datasets.load_digits()

# Convert the data and labels to pandas DataFrame and Series
mnist_data = pd.DataFrame(mnist.data)
mnist_labels = pd.Series(mnist.target)

mnist_data.shape, mnist_labels.shape



((1797, 64), (1797,))

In [None]:
hidden_layers = [25, 20]
weights_mnist = nn_layers(mnist_data, mnist_labels, hidden_layers)
X_train_mnist, X_test_mnist, y_train_mnist, y_test_mnist = train_test_split(mnist_data, mnist_labels, test_size=0.2, random_state=42)
# Normalize the data
X_train_mnist = X_train_mnist / 255.0
X_test_mnist = X_test_mnist / 255.0

In [None]:
from sklearn.utils import shuffle

iterations = 1000
num_classes = len(np.unique(mnist_labels))
learning_rate = 0.3  # Consider starting with a smaller learning rate

for i in range(iterations):
    X_train_shuffled_mnist, y_train_shuffled_mnist = shuffle(X_train_mnist, y_train_mnist)
    for record in range(len(X_train_shuffled_mnist)):
        data = X_train_shuffled_mnist.iloc[record].values.reshape(-1, 1)
        target = one_hot_encode(y_train_shuffled_mnist.iloc[record], num_classes).reshape(-1, 1)

        hidden_layer_outputs, predictions = forward(weights_mnist, data)
        weights_mnist = Backward(data, target, learning_rate, weights_mnist, hidden_layer_outputs, predictions)



In [None]:
accuracy = evaluate_model(X_test_mnist, y_test_mnist, weights_mnist, num_classes)
print("Model Accuracy:", accuracy)


Record 1/360 - Prediction Probabilities: [1.000e-04 0.000e+00 0.000e+00 0.000e+00 0.000e+00 0.000e+00 9.988e-01
 0.000e+00 3.000e-04 0.000e+00], Predicted Class: 6, True Class: 6
Record 2/360 - Prediction Probabilities: [0.000e+00 0.000e+00 0.000e+00 5.000e-04 0.000e+00 3.600e-03 0.000e+00
 0.000e+00 0.000e+00 9.989e-01], Predicted Class: 9, True Class: 9
Record 3/360 - Prediction Probabilities: [0.e+00 0.e+00 0.e+00 1.e+00 0.e+00 0.e+00 0.e+00 0.e+00 4.e-04 0.e+00], Predicted Class: 3, True Class: 3
Record 4/360 - Prediction Probabilities: [0.     0.     0.     0.     0.     0.0036 0.     0.7652 0.     0.0099], Predicted Class: 7, True Class: 7
Record 5/360 - Prediction Probabilities: [0.000e+00 0.000e+00 9.589e-01 1.230e-02 0.000e+00 0.000e+00 5.000e-04
 0.000e+00 1.000e-04 0.000e+00], Predicted Class: 2, True Class: 2
Record 6/360 - Prediction Probabilities: [0.000e+00 8.151e-01 2.658e-01 0.000e+00 0.000e+00 3.000e-04 1.430e-02
 0.000e+00 5.000e-04 0.000e+00], Predicted Class: 1, Tr

# Bonus


##Compare my model with the Keras neural network model.



IRIS

In [None]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from keras.models import Sequential
from keras.layers import Dense



# One-hot encode the target labels
encoder = OneHotEncoder(sparse=False)
label_one_hot = encoder.fit_transform(iris_label.values.reshape(-1, 1))

# Split the dataset into training and testing sets
X_train_iris_bonus, X_test_iris_bonus, y_train_iris_bonus, y_test_iris_bonus = train_test_split(data_label, label_one_hot, test_size=0.2, random_state=42)

# Define the neural network model
model = Sequential([
    Dense(4, activation='relu', input_shape=(4,)),
    Dense(5, activation='sigmoid'),
    Dense(3, activation='sigmoid'),
])

model.compile(optimizer='sgd',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model on the Iris dataset
hist = model.fit(X_train_iris_bonus, y_train_iris_bonus, epochs=1000)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test_iris_bonus, y_test_iris_bonus)

# Print the test accuracy
print(f"Test accuracy: {accuracy * 100:.2f}%")


Epoch 1/1000




Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000




Test accuracy: 83.33%


Minist

In [None]:
# One-hot encode the target labels
encoder = OneHotEncoder(sparse=False)
label_one_hot = encoder.fit_transform(mnist_labels.values.reshape(-1, 1))

X_train_mnist_bonus, X_test_mnist_bonus, y_train_mnist_bonus, y_test_mnist_bonus = train_test_split(mnist_data, label_one_hot, test_size=0.2, random_state=42)

# Define the neural network model
model = Sequential([
    Dense(64, activation='relu', input_shape=(64,)),
    Dense(25, activation='sigmoid'),
    Dense(20, activation='sigmoid'),
    Dense(10, activation='sigmoid'),
])

model.compile(optimizer='sgd',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model on the Iris dataset
hist = model.fit(X_train_mnist_bonus, y_train_mnist_bonus, epochs=200)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test_mnist_bonus, y_test_mnist_bonus)

# Print the test accuracy
print(f"Test accuracy: {accuracy * 100:.2f}%")




Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

##Compare my model with the Sklearn neural network


irirs

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score


# One-hot encode the target labels
encoder = OneHotEncoder(sparse=False)
label_one_hot = encoder.fit_transform(iris_label.values.reshape(-1, 1))


# Split the dataset into training and testing sets
X_train_iris_bonus, X_test_iris_bonus, y_train_iris_bonus, y_test_iris_bonus = train_test_split(data_label, label_one_hot, test_size=0.2, random_state=42)

# Define the neural network model
sklearn_mlp = MLPClassifier(hidden_layer_sizes=(5), activation='relu',
                            solver='sgd', max_iter=200, random_state=42)

# Fit the model on the training data
# Note: Scikit-learn's MLPClassifier does not use one-hot encoded labels, it uses the original label encoding
sklearn_mlp.fit(X_train_iris_bonus, np.argmax(y_train_iris_bonus, axis=1))

# Predict and evaluate the model
predictions = sklearn_mlp.predict(X_test_iris_bonus)
accuracy = accuracy_score(np.argmax(y_test_iris_bonus, axis=1), predictions)

print(f"Test accuracy with Scikit-learn MLPClassifier: {accuracy * 100:.2f}%")


Test accuracy with Scikit-learn MLPClassifier: 80.00%




minist

In [None]:
# One-hot encode the target labels
encoder = OneHotEncoder(sparse=False)
label_one_hot = encoder.fit_transform(mnist_labels.values.reshape(-1, 1))

X_train_mnist_bonus, X_test_mnist_bonus, y_train_mnist_bonus, y_test_mnist_bonus = train_test_split(mnist_data, label_one_hot, test_size=0.2, random_state=42)

# Define the neural network model
sklearn_mlp = MLPClassifier(hidden_layer_sizes=(25,20), activation='relu',
                            solver='sgd', max_iter=200, random_state=42)

# Fit the model on the training data
# Note: Scikit-learn's MLPClassifier does not use one-hot encoded labels, it uses the original label encoding
sklearn_mlp.fit(X_train_mnist_bonus, np.argmax(y_train_mnist_bonus, axis=1))

# Predict and evaluate the model
predictions = sklearn_mlp.predict(X_test_mnist_bonus)
accuracy = accuracy_score(np.argmax(y_test_mnist_bonus, axis=1), predictions)

print(f"Test accuracy with Scikit-learn MLPClassifier: {accuracy * 100:.2f}%")




Test accuracy with Scikit-learn MLPClassifier: 95.83%


