## Classification of Hand written digits using Neural Networks

### Using MNIST dataset and Flux.jl


In [42]:
using Flux, Flux.Data.MNIST
using Flux: onehotbatch, argmax, crossentropy, throttle
using Base.Iterators: repeated

In [6]:
imgs = MNIST.images(); #Getting Images
labels = MNIST.labels(); #Getting Labels 0-9 for all the images

In [7]:
# Converting labels to one hot matrix
Y = onehotbatch(labels, 0:9)

10×60000 Flux.OneHotMatrix{Array{Flux.OneHotVector,1}}:
 false   true  false  false  false  …  false  false  false  false  false
 false  false  false   true  false     false  false  false  false  false
 false  false  false  false  false     false  false  false  false  false
 false  false  false  false  false     false   true  false  false  false
 false  false   true  false  false     false  false  false  false  false
  true  false  false  false  false  …  false  false   true  false  false
 false  false  false  false  false     false  false  false   true  false
 false  false  false  false  false     false  false  false  false  false
 false  false  false  false  false      true  false  false  false   true
 false  false  false  false   true     false  false  false  false  false

In [31]:
#Stacking images into batch (here each image is a column 28X28 pixels stacked horizontally)
X = hcat(float.(reshape.(imgs, :))...);

In [33]:
# Creating a NN with 32X784 and 10X32 layers
m = Chain(
  Dense(28^2, 32, relu),
  Dense(32, 10),
  softmax)

##Different method of chaining
# Method 2

# W1 = params(rand(32,784))
# b1 = params(rand(32))
# layer1(x) = W1*x .+ b1
# W2 = params(rand(10,32))
# b2 = params(rand(10))
# layer2(x) = W2*x .+ b2
# model(x) = layer2(σ.(layer1(x)))

# Method 3
# struct Affine
#   W
#   b
# end

# Affine(in::Integer, out::Integer) =
#   Affine(param(randn(out, in)), param(randn(out)))

# # Overload call, so the object can be used as a function
# (m::Affine)(x) = m.W * x .+ m.b

# a = Affine(10, 5)

Chain(Dense(784, 32, NNlib.relu), Dense(32, 10), NNlib.softmax)

In [34]:
loss(x, y) = crossentropy(m(x), y) #Flux.mse(m(x), y) can also be used


loss (generic function with 1 method)

In [35]:
# Function to measure accuracy (basically reversing one hot and comparing labels)
accuracy(x, y) = mean(argmax(m(x)) .== argmax(y))


accuracy (generic function with 1 method)

In [43]:
# Iterator to repeat the data 200 times
dataset = repeated((X, Y), 200)

# Anonymous function to show loss while training
evalcb = () -> @show(loss(X, Y))

# Optimizationg of NN weights using Adam optimization
opt = ADAM(params(m))

  likely near In[43]:2


(::#71) (generic function with 1 method)

In [44]:
## Throttle is to show loss every 10 secs

Flux.train!(loss, dataset, opt, cb = throttle(evalcb, 10))


loss(X, Y) = 2.297694826789072 (tracked)
loss(X, Y) = 1.48078379598783 (tracked)
loss(X, Y) = 0.9339084500384616 (tracked)
loss(X, Y) = 0.6621529396381316 (tracked)
loss(X, Y) = 0.5284040448120789 (tracked)
loss(X, Y) = 0.4533253160668058 (tracked)
loss(X, Y) = 0.40580081828071307 (tracked)
loss(X, Y) = 0.37265836959684095 (tracked)
loss(X, Y) = 0.3477831053025163 (tracked)
loss(X, Y) = 0.32811594071214156 (tracked)
loss(X, Y) = 0.3119292983960813 (tracked)
loss(X, Y) = 0.2990586362468767 (tracked)
loss(X, Y) = 0.28707128063597964 (tracked)
loss(X, Y) = 0.27716454393026685 (tracked)


In [45]:
# Accuracy of predicted labels
accuracy(X, Y)

0.9254833333333333