# MNIST Dataset Digit Recognition using a Neural Network
### Isaac Mattern, Jamie Walters, Noah LaPolt, Sam Gernstetter
CSE 3521 - Intro AI at The Ohio State Univeristy

## Load Data

In [4]:
import numpy as np
import pandas as pd

# Import data: this data is from https://www.kaggle.com/c/digit-recognizer/data
# Download, then upload to this colab project
train = pd.read_csv("train.csv")
# Test csv has no labels column. Submission is a csv, Kaggle will tell us our accuracy. 
x_test = pd.read_csv("test.csv")

In [5]:
# Separate training dataframe into labels (the actual digit) and the data we are looking at
y_train = train["label"]
x_train = train.drop(columns=["label"])

del train

## Normalize the data
Currently, there is a value from 0 to 255 to indicate the darkness of the pixel. We will divide this value by 255 to get a decimal value between 0 and 1.

In [6]:
x_train = x_train / 255.0
x_test = x_test / 255.0

## Neural Network Function

In [22]:
def nn(x, y, learningRate=0.01):
  n = np.zeros((10, x.shape[1])) # Table of weights.
  s = x.shape[0]                 # Number of test variables.
  x = x.values                   # X as a list.

  # Loop through all training images.
  for i in range(s):
    y_hat = np.sign(np.matmul(n, x[i]))

    for j in range(10):
      if j == y[i]:
        if(y_hat[j] != 1):
          n[j] = np.add(n[j], (learningRate * x[i]))
      else:
        if(y_hat[j] != -1):
          n[j] = np.add(n[j], (-learningRate * x[i]))


  return n

In [23]:
n = nn(x_train, y_train)

In [18]:
def test(x, n):
  data = []     # The final guesses as a list
  x = x.values  # X as a list.

  # Loop through all test images.
  for i in range(x.shape[0]):
    # Multiple the pixel values by the weights to get a 10x1 matrix of answers.
    guess = np.sign(np.matmul(n, x[i]))
    # Append the index of the highest value to the data.
    data.append([i + 1, np.where(guess == max(guess))[0][0]])

  # Returns data as data frame.
  return pd.DataFrame(data, columns =['InageId', 'Label'])

In [24]:
data = test(x_train, n)
correct = 0
total = 0

for i, row in enumerate(data.iterrows()):
  val = row[1].values
  if val[1] == y_train[i]:
    correct += 1
  total += 1

print("Train accuracy:", correct / total)


Train accuracy: 0.804952380952381


In [25]:
data = test(x_test, n)

# Writes the file into a CSV for checking.
with open('results.csv', 'w') as writefile:
  writefile.write("ImageId,Label\n")
  for row in data.iterrows():
    val = row[1].values
    writefile.write(str(val[0]))
    writefile.write(",")
    writefile.write(str(val[1]))
    writefile.write("\n")