# Bogumił Kamiński, 2019-03-25

`.int` file format specification:
* Each image is encoded by 1+28^2 bytes.
* Images are stored consecutively.
* First byte for each image is its class from 0 to 9
* Then each of remaining 28^2 has value from 0 to 255
  and encodes one entry in a 28x28 image

In [1]:
using Statistics

In [2]:
const TRAIN = (class=UInt8[], image=Matrix{Int32}[])

(class = UInt8[], image = Array{Int32,2}[])

In [3]:
const TEST = (class=UInt8[], image=Matrix{Int32}[])

(class = UInt8[], image = Array{Int32,2}[])

In [4]:
for (filename, data) in [("mnist_train.int", TRAIN),
                         ("mnist_test.int", TEST)]
    open(filename) do f
        while !eof(f)
            c = read(f, UInt8)
            v = read(f, 28^2)
            push!(data.class, c)
            push!(data.image, reshape(v, 28, 28))
        end
    end
end

In [5]:
function distance(a, b)
    d = 0
    @simd for i in 1:length(a)
        @inbounds d += (a[i] - b[i]) ^ 2
    end
    d
end

distance (generic function with 1 method)

In [6]:
function knnacc(i)
    dist = distance.(Ref(TEST.image[i]), TRAIN.image)
    knn_loc = partialsortperm(dist, 1:20)
    TRAIN.class[knn_loc]
end

knnacc (generic function with 1 method)

In [7]:
@time neivec = map(knnacc, axes(TEST.class, 1))

352.095416 seconds (965.67 k allocations: 8.987 GiB, 0.44% gc time)


10000-element Array{Array{UInt8,1},1}:
 [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
 [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
 [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
 [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
 [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
 [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
 [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
 [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0

In [8]:
const NEIGHBOURS = reduce(hcat, neivec)

20×10000 Array{UInt8,2}:
 0x00  0x00  0x00  0x00  0x00  0x00  …  0x09  0x09  0x09  0x09  0x09  0x09
 0x00  0x00  0x00  0x00  0x00  0x00     0x09  0x09  0x09  0x09  0x09  0x09
 0x00  0x00  0x00  0x00  0x00  0x00     0x09  0x09  0x09  0x09  0x09  0x09
 0x00  0x00  0x00  0x00  0x00  0x00     0x09  0x09  0x09  0x09  0x09  0x09
 0x00  0x00  0x00  0x00  0x00  0x00     0x09  0x09  0x09  0x09  0x09  0x09
 0x00  0x00  0x00  0x00  0x00  0x00  …  0x09  0x09  0x09  0x09  0x09  0x09
 0x00  0x00  0x00  0x00  0x00  0x00     0x09  0x09  0x09  0x09  0x09  0x09
 0x00  0x00  0x00  0x00  0x00  0x00     0x09  0x09  0x09  0x09  0x09  0x09
 0x00  0x00  0x00  0x00  0x00  0x00     0x09  0x09  0x09  0x09  0x04  0x09
 0x00  0x00  0x00  0x00  0x00  0x00     0x04  0x09  0x09  0x09  0x09  0x04
 0x00  0x00  0x00  0x00  0x00  0x00  …  0x07  0x09  0x09  0x09  0x09  0x09
 0x00  0x00  0x00  0x00  0x00  0x00     0x07  0x09  0x09  0x09  0x07  0x09
 0x00  0x00  0x00  0x00  0x00  0x00     0x09  0x09  0x09  0x09  0x09  0x09


In [9]:
function acc(classes, testclass)
    v = zeros(Int, 10)
    for c in classes
        v[c + 1] += 1
    end
    m = maximum(v)
    (v[testclass + 1] == m) / count(==(m), v)
end

acc (generic function with 1 method)

In [10]:
function evalk(k)
    mean(axes(NEIGHBOURS, 2)) do i
        classes = view(NEIGHBOURS, 1:k, i)
        testclass = TEST.class[i]
        acc(classes, testclass)
    end
end

evalk (generic function with 1 method)

In [11]:
evalk.(axes(NEIGHBOURS, 1))

20-element Array{Float64,1}:
 0.9691            
 0.96455           
 0.971             
 0.968925          
 0.96895           
 0.9684666666666668
 0.9692333333333332
 0.9677083333333332
 0.9663166666666668
 0.9668833333333334
 0.9668666666666668
 0.9660833333333334
 0.9654666666666666
 0.9640666666666667
 0.9633666666666666
 0.9633666666666668
 0.96315           
 0.9634333333333333
 0.9631166666666666
 0.96235           