<a href="https://colab.research.google.com/github/amosweckstrom/neural_networks/blob/main/mnist_neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Training a Neural Network to predict if an handwritten character is a 0 or a 1

## Decription:

 Dataset:
 - The classic mnist dataset is used. But all the handwritten characters  except for the 0s and 1s are removed
 - We have two datasets, the mnist_test.csv and the mnist_train_small.csv



 Neural Network:

 - I used the logistic regression as the activation functions for the units/neurons which uses a sigmoid function to map predictions between 0 and 1

 - To calculate the loss for binary values we use the BinaryCrossEntropy loss function

 - I created 3 layers for the network
   
   Layer 1: 25 neurons\
   Layer 2: 15 neurons\
   Layer 3 (Output Layer): 1 neuron




In [2]:
#Import necessary libraries

import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

In [3]:
#Import data
df = pd.read_csv("./sample_data/mnist_train_small.csv")
df_test = pd.read_csv("./sample_data/mnist_test.csv")

## Training Data

In [4]:
#Inspect the training data
print("Data dimensions:", df.shape)
print("Data type:", type(df))

Data dimensions: (19999, 785)
Data type: <class 'pandas.core.frame.DataFrame'>


In [5]:
#Create a new dataframe where only the handwritten 0s and 1s are
df_1_0 = df[(df["6"] == 1) | (df["6"] == 0)]

#Create training data by dropping the target data column
X_train = df_1_0.drop("6", axis=1)
X_train = np.array(X_train)

#Create target data
y_train = df_1_0["6"]
y_train = np.array(y_train)

print(
    "Dimensions of the training data:", X_train.shape,
    "\nData type:", type(y_train)

    )
print(
    "Dimensions of the target data:", y_train.shape ,
    "\nData type:", type(y_train)

    )

Dimensions of the training data: (4205, 784) 
Data type: <class 'numpy.ndarray'>
Dimensions of the target data: (4205,) 
Data type: <class 'numpy.ndarray'>


## Test Data

In [6]:
#Inspect the test data
print("Data dimensions:", df_test.shape)
print("Data type:", type(df_test))


Data dimensions: (9999, 785)
Data type: <class 'pandas.core.frame.DataFrame'>


In [7]:
df_test_1_0 = df_test[(df_test["7"] == 1) | (df_test["7"] == 0)]

X_test = df_test_1_0.drop("7", axis=1)
X_test = np.array(X_test)


y_test = df_test_1_0["7"]
y_test = np.array(y_test)

print(
    "Dimensions of the test feature data:", X_train.shape,
    "\nData type:", type(y_train)

    )
print(
    "Dimensions of the test target data:", y_train.shape ,
    "\nData type:", type(y_train)

    )



Dimensions of the test feature data: (4205, 784) 
Data type: <class 'numpy.ndarray'>
Dimensions of the test target data: (4205,) 
Data type: <class 'numpy.ndarray'>


# Neural Network initialization and training

In [8]:
#Initialize a Sequential Neural network with sigmoid activation function

model = Sequential([
    Dense(5, activation="sigmoid"),
    Dense(3, activation="sigmoid"),
    Dense(1, activation="sigmoid")

])

#Set model loss function to BinaryCrossEntropy to get the errors of binary predictions and targets
model.compile(loss=BinaryCrossentropy())

In [9]:
#Train the neural network

model.fit(X_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7926ce7571c0>

#Predictions

In [10]:
#Make predictions on the test data
pred = model.predict(X_test)

#Apply a threshhold of 0.5 so that the predictions are binary
predictions = (pred >= 0.5).astype(int).flatten()

print(
    "\n"
    "Predictions:", predictions,
    "\nDimensions of the predictions array:", predictions.shape
)

print(
    "\n",
    "Target values:", y_test,
    "\nDimensions of the target values", y_test.shape
)

print("\nLooks good!!")


Predictions: [1 0 1 ... 1 0 1] 
Dimensions of the predictions array: (2115,)

 Target values: [1 0 1 ... 1 0 1] 
Dimensions of the target values (2115,)

Looks good!!


In [11]:
#Define a function that calculates the accuracy of our predictions

def calculate_accuracy(predictions, actuals):

    predicitons = list(predictions)
    actuals = list(actuals)
    # Check that the two lists are of the same length
    if len(predictions) != len(actuals):
        raise ValueError("The lengths of the prediction and actual lists must be the same.")

    # Count the number of correct predictions
    correct_predictions = sum(pred == actual for pred, actual in zip(predictions, actuals))

    # Calculate the accuracy
    accuracy = correct_predictions / len(predictions)

    return accuracy

In [12]:
print(
    "Model Accuracy:",
    calculate_accuracy(predictions, y_test)
    )

Model Accuracy: 0.9995271867612293
