# Uses

In [31]:
using Flux, Statistics
using CSV, DataFrames

# Data

In [32]:
train_df = CSV.read("data/train.csv", DataFrame)
train_X = select(train_df, Not(:label))
train_y = train_df.label

test_X = CSV.read("data/test.csv", DataFrame)

result = DataFrame(ImageId = 1:size(test_X, 1))
nothing

# Normalization

In [33]:
function norm(d) 
    d = Float32.(d) ./ 255.0f0
    d = Matrix(d)
    d = Flux.flatten(d)
    d = Flux.flatten(d)
    if size(d, 2) == 784 
        d = permutedims(d)
    end
    d = reshape(d, 28, 28, 1, :)
    return d
end

train_X = norm(train_X)
output_X = test_X
test_X = norm(test_X)

train_y = Flux.onehotbatch(train_y, 0:9)
println(size(output_X))

(28000, 784)


# Model

In [34]:
model = Chain(
    Conv((3, 3), 1=>16, relu, pad=1),
    MaxPool((2, 2)),
    Conv((3, 3), 16=>32, relu, pad=1),
    MaxPool((2, 2)),
    Flux.flatten,
    Dense(32*7*7, 64, relu),
    Dropout(0.2),
    Dense(64, 10)
)


Chain(
  Conv((3, 3), 1 => 16, relu, pad=1),   [90m# 160 parameters[39m
  MaxPool((2, 2)),
  Conv((3, 3), 16 => 32, relu, pad=1),  [90m# 4_640 parameters[39m
  MaxPool((2, 2)),
  Flux.flatten,
  Dense(1568 => 64, relu),              [90m# 100_416 parameters[39m
  Dropout(0.2),
  Dense(64 => 10),                      [90m# 650 parameters[39m
) [90m                  # Total: 8 arrays, [39m105_866 parameters, 414.266 KiB.

# Training

In [35]:
train_loader = Flux.DataLoader((train_X, train_y); batchsize=128, shuffle=true)

op = Adam(0.001)

opt_state = Flux.setup(op, model)

function train_epoch!(model, loader, opt)
    total_loss = 0.0
    for (x, y) in loader 
        loss, grads = Flux.withgradient(model) do m
            Flux.logitcrossentropy(m(x), y)
        end
        Flux.update!(opt, model, grads[1])
        total_loss = 0.0
    end
    return total_loss / length(loader)
end

function accuracy(model, x, y)
    pred = model(x)
    return mean(Flux.onecold(pred, 0:9) .== Flux.onecold(y, 0:9))
end

println("\nStarting training...")
for epoch in 1:15
    avg_loss = train_epoch!(model, train_loader, opt_state)
    test_acc = accuracy(model, train_X, train_y)
    println("Epoch $epoch: Loss = $(round(avg_loss, digits=4)), Test Acc = $(round(test_acc*100, digits=2))%")
end


Starting training...
Epoch 1: Loss = 0.0, Test Acc = 97.07%
Epoch 2: Loss = 0.0, Test Acc = 98.37%
Epoch 3: Loss = 0.0, Test Acc = 98.84%
Epoch 4: Loss = 0.0, Test Acc = 98.95%
Epoch 5: Loss = 0.0, Test Acc = 99.28%
Epoch 6: Loss = 0.0, Test Acc = 99.17%
Epoch 7: Loss = 0.0, Test Acc = 99.35%
Epoch 8: Loss = 0.0, Test Acc = 99.45%
Epoch 9: Loss = 0.0, Test Acc = 99.6%
Epoch 10: Loss = 0.0, Test Acc = 99.66%
Epoch 11: Loss = 0.0, Test Acc = 99.73%
Epoch 12: Loss = 0.0, Test Acc = 99.72%
Epoch 13: Loss = 0.0, Test Acc = 99.77%
Epoch 14: Loss = 0.0, Test Acc = 99.83%
Epoch 15: Loss = 0.0, Test Acc = 99.83%


# Output

In [38]:
pred = Flux.onecold(model(test_X), 0:9)
@assert length(unique(pred)) == 10

result.Label = pred
CSV.write("out.csv", result)

"out.csv"