## Pre Proccessing


In [30]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split

# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

train_filter = (y_train == 0) | (y_train == 1)
test_filter = (y_test == 0) | (y_test == 1)

X_train_binary = X_train[train_filter]
y_train_binary = y_train[train_filter]
X_test_binary = X_test[test_filter]
y_test_binary = y_test[test_filter]

X_train_binary = X_train_binary / 255.0
X_test_binary = X_test_binary / 255.0

X_train_binary = X_train_binary.reshape(-1, 28*28)
X_test_binary = X_test_binary.reshape(-1, 28*28)

y_train_binary = np.where(y_train_binary == 0, 0, 1)
y_test_binary = np.where(y_test_binary == 0, 0, 1)
X_train_binary.shape, y_train_binary.shape

((12665, 784), (12665,))

In [31]:
hidden_layers = int(input("Enter the number of hidden layers: "))
neurons_per_layer = []
for i in range(hidden_layers):
    neurons_per_layer.append(int(input(f"Enter the number of neurons for layer {i+1}: ")))

activation_function = input("Enter the activation function for hidden layers (sigmoid/relu): ").strip().lower()

In [32]:
def derivative_activation_function(z, func):
    if func == 'sigmoid':
        sig = 1 / (1 + np.exp(-z))
        return sig * (1 - sig)
    elif func == 'relu':
        return np.where(z > 0, 1, 0)
    elif func == 'tanh':
        return 1 - np.tanh(z)**2
    else:
        raise ValueError("Unsupported activation function")

In [33]:
def forward_propagation(X, weights, biases, activation_function):
    a = X
    zs = []
    activations = [X]
    for w, b in zip(weights, biases):
        z = w @ a + b
        zs.append(z)
        if activation_function == 'sigmoid':
            a = 1 / (1 + np.exp(-z))
        elif activation_function == 'relu':
            a = np.maximum(0, z)
        elif activation_function == 'tanh':
            a = np.tanh(z)
        else:
            raise ValueError("Unsupported activation function")
        activations.append(a)
    return zs, activations

In [34]:
def back_propagation(zs, activations, weights, y, activation_function):
    deltas = [None] * len(weights)
    deltas[-1] = (activations[-1] - y) * derivative_activation_function(zs[-1], activation_function)
    for l in range(len(deltas) - 2, -1, -1):
        deltas[l] = (weights[l + 1].T @ deltas[l + 1]) * derivative_activation_function(zs[l], activation_function)
    weights_gradients = []
    biases_gradients = []
    for l in range(len(deltas)):
        weights_gradients.append(deltas[l] @ activations[l].T)
        biases_gradients.append(np.sum(deltas[l], axis=1, keepdims=True))
    return weights_gradients, biases_gradients

In [35]:
def update_parameters(weights, biases, weights_gradients, biases_gradients, learning_rate):
    for l in range(len(weights)):
        weights[l] -= learning_rate * weights_gradients[l]
        biases[l] -= learning_rate * biases_gradients[l]
    return weights, biases

In [36]:
def train_neural_network(X_train, y_train, hidden_layers, neurons_per_layer, activation_function, epochs=1000, learning_rate=0.01, batch_size=64):
    input_size = X_train.shape[1]
    output_classes = 1
    weights = []
    biases = []
    layers = [input_size] + neurons_per_layer + [output_classes]
    for i in range(1, hidden_layers + 2):
        w = np.random.randn(layers[i], layers[i - 1]) * 0.01
        b = np.zeros((layers[i], 1))
        weights.append(w)
        biases.append(b)
    # show initial accuracy
    _, train_activations = forward_propagation(X_train.T, weights, biases, activation_function)
    train_predictions = (train_activations[-1] > 0.5).astype(int)
    train_accuracy = np.mean(train_predictions.flatten() == y_train_binary) * 100
    print(f"Initial Accuracy: {train_accuracy:.2f}%")
    for epoch in range(epochs):
        permutation = np.random.permutation(X_train.shape[0])
        X_shuffled = X_train[permutation]
        y_shuffled = y_train[permutation]
        for i in range(0, X_train.shape[0], batch_size):
            X_batch = X_shuffled[i:i + batch_size].T
            y_batch = y_shuffled[i:i + batch_size].reshape(1, -1)
            zs, activations = forward_propagation(X_batch, weights, biases, activation_function)
            weights_gradients, biases_gradients = back_propagation(zs, activations, weights, y_batch, activation_function)
            weights, biases = update_parameters(weights, biases, weights_gradients, biases_gradients, learning_rate)
        # show accuracy per epoch
        _, train_activations = forward_propagation(X_train.T, weights, biases, activation_function)
        train_predictions = (train_activations[-1] > 0.5).astype(int)
        train_accuracy = np.mean(train_predictions.flatten() == y_train_binary) * 100
        print(f"Epoch {epoch + 1}, Accuracy: {train_accuracy:.6f}%")
    return weights, biases

In [37]:
import tensorflow as tf
from tensorflow.keras.applications import mobilenet_v2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
def extract_features(X_train):
    model = mobilenet_v2.MobileNetV2(weights='imagenet', include_top=False, pooling='avg', input_shape=(32, 32, 3))
    X_train = X_train.reshape(-1, 28, 28)
    X_train = np.expand_dims(X_train, axis=-1)
    resized_images = tf.image.resize(X_train, [32, 32])
    resized_images = tf.repeat(resized_images, 3, axis=-1)
    preprocessed_images = preprocess_input(resized_images)
    features = model.predict(preprocessed_images)
    return features

In [38]:
extracted_features = extract_features(X_train_binary)
train_neural_network(extracted_features, y_train_binary, hidden_layers, neurons_per_layer, activation_function, 300)

  model = mobilenet_v2.MobileNetV2(weights='imagenet', include_top=False, pooling='avg', input_shape=(32, 32, 3))


[1m396/396[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 17ms/step
Initial Accuracy: 46.77%


MemoryError: Unable to allocate 124. MiB for an array with shape (1280, 12665) and data type float64

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Input
model = Sequential([
    Input(shape=(28*28,)),
    *[Dense(neurons, activation=activation_function) for neurons in neurons_per_layer],
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train_binary, y_train_binary, epochs=300, batch_size=64)

Epoch 1/300
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9910 - loss: 0.0287
Epoch 2/300
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9991 - loss: 0.0035
Epoch 3/300
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9993 - loss: 0.0029
Epoch 4/300
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9997 - loss: 0.0012    
Epoch 5/300
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9991 - loss: 0.0025
Epoch 6/300
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9999 - loss: 3.4946e-04
Epoch 7/300
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 1.0000 - loss: 1.2321e-04
Epoch 8/300
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 1.0000 - loss: 1.9883e-05
Epoch 9/300
[1m

<keras.src.callbacks.history.History at 0x18a1dd06900>