In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml

# Fetch the MNIST dataset
mnist = fetch_openml("mnist_784")

# Assign data to X and targets to y, both converted to appropriate types
X = mnist.data.astype('float32')
y = mnist.target.astype('int64')

# Normalize pixel values
X /= 255.0

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the MLP class
class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights and biases for the hidden and output layers
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        # Forward pass through the network
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = np.tanh(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        exp_scores = np.exp(self.z2)
        self.probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        return self.probs

    def loss(self, X, y):
        # Compute the loss
        probs = self.forward(X)
        log_probs = -np.log(probs[range(len(y)), y])
        return np.mean(log_probs)

    def predict(self, X):
        # Predict the class labels
        probs = self.forward(X)
        return np.argmax(probs, axis=1)

    def train(self, X, y, epochs, learning_rate):
        # Train the network
        for epoch in range(epochs):
            probs = self.forward(X)
            delta = probs.copy()
            delta[range(len(y)), y] -= 1
            delta /= len(y)

            # Backpropagation
            dW2 = np.dot(self.a1.T, delta)
            db2 = np.sum(delta, axis=0, keepdims=True)

            delta2 = np.dot(delta, self.W2.T) * (1 - np.power(self.a1, 2))
            dW1 = np.dot(X.T, delta2)
            db1 = np.sum(delta2, axis=0, keepdims=True)

            # Gradient descent
            self.W1 -= learning_rate * dW1
            self.b1 -= learning_rate * db1
            self.W2 -= learning_rate * dW2
            self.b2 -= learning_rate * db2

            # Print loss every 100 epochs
            if epoch % 100 == 0:
                print(f"Epoch {epoch} loss: {self.loss(X, y)}")

# Set the hyperparameters
input_size = X_train.shape[1]
hidden_size = 512
output_size = 50
learning_rate = 0.05
epochs = 2000

# Create and train the MLP
mlp = MLP(input_size, hidden_size, output_size)
mlp.train(X_train, y_train, epochs, learning_rate)

# Evaluate on training set
y_pred_train = mlp.predict(X_train)
train_accuracy = np.mean(y_pred_train == y_train)


Epoch 0 loss: 3.891613111148901
Epoch 100 loss: 1.2797301119568272
Epoch 200 loss: 0.6916709524245601
Epoch 300 loss: 0.5274013672010436
Epoch 400 loss: 0.4530128366518832
Epoch 500 loss: 0.4110486750205873
Epoch 600 loss: 0.38415602912191843
Epoch 700 loss: 0.3653160188675642
Epoch 800 loss: 0.3512372700156132
Epoch 900 loss: 0.34020773833015105
Epoch 1000 loss: 0.3312562002257787
Epoch 1100 loss: 0.3237893764904585
Epoch 1200 loss: 0.3174219398266385
Epoch 1300 loss: 0.31189096884447237
Epoch 1400 loss: 0.30700990025713365
Epoch 1500 loss: 0.3026421531335177
Epoch 1600 loss: 0.29868516609817486
Epoch 1700 loss: 0.29506027391024614
Epoch 1800 loss: 0.2917060351162225
Epoch 1900 loss: 0.28857369479551503
Train accuracy: 0.9177857142857143
Test accuracy: 0.9165


In [4]:
from sklearn.metrics import confusion_matrix

# Define the input size based on the training data
input_size = X_train.shape[1]

# There are 10 classes (0 to 9) in MNIST
output_size = 10  

# Define the learning rate and the number of training epochs
learning_rate = 0.05
epochs = 2000

# Different values for hidden layer size that we want to test
hidden_sizes = [32, 128, 1024]  

# Loop over each hidden layer size
for hidden_size in hidden_sizes:
    print(f"\nTraining MLP with hidden layer size {hidden_size}")

    # Initialize the MLP with the current hidden layer size
    mlp = MLP(input_size, hidden_size, output_size)

    # Train the MLP on the training data
    mlp.train(X_train, y_train, epochs, learning_rate)

    # Predict the labels for the training data
    y_pred_train = mlp.predict(X_train)

    # Calculate and print the training accuracy
    train_accuracy = np.mean(y_pred_train == y_train)
    print(f"Train accuracy: {train_accuracy}")

    # Calculate and print the training confusion matrix
    print("Train confusion matrix:")
    print(confusion_matrix(y_train, y_pred_train))

    # Predict the labels for the test data
    y_pred_test = mlp.predict(X_test)

    # Calculate and print the test accuracy
    test_accuracy = np.mean(y_pred_test == y_test)
    print(f"Test accuracy: {test_accuracy}")

    # Calculate and print the test confusion matrix
    print("Test confusion matrix:")
    print(confusion_matrix(y_test, y_pred_test))




Training MLP with hidden layer size 32
Epoch 0 loss: 2.30253198423809
Epoch 100 loss: 2.076991802790045
Epoch 200 loss: 1.237112446417689
Epoch 300 loss: 0.8374995897909436
Epoch 400 loss: 0.6462378090234638
Epoch 500 loss: 0.54197243122276
Epoch 600 loss: 0.4769855814464789
Epoch 700 loss: 0.4331372083463388
Epoch 800 loss: 0.4018873941514794
Epoch 900 loss: 0.3785301381231243
Epoch 1000 loss: 0.36031290290749685
Epoch 1100 loss: 0.34557206476546276
Epoch 1200 loss: 0.3332704078013054
Epoch 1300 loss: 0.3227400941130983
Epoch 1400 loss: 0.3135369708439419
Epoch 1500 loss: 0.3053563030525885
Epoch 1600 loss: 0.2979828935146342
Epoch 1700 loss: 0.2912607769269981
Epoch 1800 loss: 0.2850742776754602
Epoch 1900 loss: 0.27933581462482504
Train accuracy: 0.9229821428571429
Train confusion matrix:
[[5388    0   30   13   12   29   35    6   43    4]
 [   0 6096   29   21    9   17   10   17   70    8]
 [  52   46 5059   58   77    8  101   68  128   13]
 [  21   27  113 5108    3  193   22 

In [16]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.metrics import accuracy_score, confusion_matrix

# Define the KNN class
class KNN:
    def __init__(self, k):
        self.k = k  # Number of neighbors to consider

    def fit(self, X, y):
        self.X_train = X  # Training data
        self.y_train = y  # Training labels

    def predict(self, X):
        # For each example in X, predict its label
        predicted_labels = [self._predict(x) for x in X]
        return np.array(predicted_labels)

    def _predict(self, x):
        # Compute the Euclidean distance from x to each example in the training set
        distances = [np.linalg.norm(x - x_train) for x_train in self.X_train]

        # Get the indices of the k nearest neighbors
        k_indices = np.argsort(distances)[:self.k]
        # Get the labels of the k nearest neighbors
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        # Return the most common label among the k nearest neighbors
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]

# Load MNIST dataset
mnist = datasets.load_digits()

# Create feature and target arrays
X = mnist.data
y = mnist.target

# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123, stratify=y)

# Trying out with different k values
k_values = [3, 5, 7, 9]

for k in k_values:
    print(f"Results for k={k}:")
    # Initialize the KNN classifier
    classifier = KNN(k=k)
    # Fit the classifier to the training data
    classifier.fit(X_train, y_train)
    # Predict the labels of the test set
    y_pred = classifier.predict(X_test)

    # Compute the accuracy of the classifier
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy}")
    # Compute the confusion matrix of the classifier
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("\n")


Results for k=3:
Accuracy: 0.9861111111111112
Confusion Matrix:
[[36  0  0  0  0  0  0  0  0  0]
 [ 0 36  0  0  0  0  0  0  0  0]
 [ 0  0 34  1  0  0  0  0  0  0]
 [ 0  0  0 37  0  0  0  0  0  0]
 [ 0  0  0  0 36  0  0  0  0  0]
 [ 0  0  0  0  0 36  0  0  0  1]
 [ 0  0  0  0  0  0 36  0  0  0]
 [ 0  0  0  0  0  0  0 35  0  1]
 [ 0  2  0  0  0  0  0  0 33  0]
 [ 0  0  0  0  0  0  0  0  0 36]]


Results for k=5:
Accuracy: 0.9916666666666667
Confusion Matrix:
[[36  0  0  0  0  0  0  0  0  0]
 [ 0 36  0  0  0  0  0  0  0  0]
 [ 0  0 34  1  0  0  0  0  0  0]
 [ 0  0  0 37  0  0  0  0  0  0]
 [ 0  0  0  0 36  0  0  0  0  0]
 [ 0  0  0  0  0 37  0  0  0  0]
 [ 0  0  0  0  0  0 36  0  0  0]
 [ 0  0  0  0  0  0  0 36  0  0]
 [ 0  2  0  0  0  0  0  0 33  0]
 [ 0  0  0  0  0  0  0  0  0 36]]


Results for k=7:
Accuracy: 0.9916666666666667
Confusion Matrix:
[[36  0  0  0  0  0  0  0  0  0]
 [ 0 36  0  0  0  0  0  0  0  0]
 [ 0  0 34  0  0  0  0  1  0  0]
 [ 0  0  0 37  0  0  0  0  0  0]
 [ 0  0  0

In [10]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from sklearn.metrics import confusion_matrix
import numpy as np

# Load and split dataset
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

# Reshape the images
train_images = train_images.reshape((-1, 28, 28, 1))
test_images = test_images.reshape((-1, 28, 28, 1))

# Create the convolutional base
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Add Dense layers on top
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))

# Compile and train the model
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(train_images, train_labels, epochs=5)

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)
print('\nTest accuracy:', test_acc)

# Compute confusion matrix
y_pred = model.predict(test_images)
y_pred_classes = np.argmax(y_pred, axis=1)

confusion_mtx = confusion_matrix(test_labels, y_pred_classes) 
print('Confusion Matrix:\n', confusion_mtx)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 0.0339 - accuracy: 0.9900 - 1s/epoch - 4ms/step

Test accuracy: 0.9900000095367432
Confusion Matrix:
 [[ 977    0    0    0    0    0    1    2    0    0]
 [   0 1127    2    0    0    0    1    4    0    1]
 [   1    1 1021    0    1    0    0    8    0    0]
 [   0    0    1 1005    0    1    0    3    0    0]
 [   0    0    1    0  974    0    1    0    0    6]
 [   0    0    1   15    0  874    2    0    0    0]
 [   1    4    0    0    3    1  949    0    0    0]
 [   0    1    3    1    0    0    0 1020    0    3]
 [   4    0    3    0    0    2    0    3  957    5]
 [   0    0    0    0    3    3    1    6    0  996]]
