In [18]:
# Data processing
using MLDatasets;
using MLUtils: DataLoader;
using MLDataPattern;
using ImageCore;
using Augmentor;
using ImageFiltering;
using MappedArrays;
using Random;
using Flux: DataLoader;

In [4]:
using Flux;

# Data Pre-processing

* Inputs: batches of (32 x 32) RGB images
    * Tensor size (32, 32, 3, N) in WHCN dimensions
    * Values between [0, 1]
* For all data: ImageNet normalization
    * Subtract means [0.485, 0.456, 0.406]
    * Divide by standard deviations [0.229, 0.224, 0.225]
* Augment training data only:
    * Permute to CWHN (3, 32, 32, N)
    * Convert to RGB image for Augmentor.jl package to process (32, 32, N)
    * 4 pixel padding on each side (40, 40, N)
    * Random horizontal flip
    * (32 x 32) crop from augmented image (32, 32, N)
    * Convert to tensors (3, 32, 32, N)
    * Permute to WHCN (32, 32, 3, N)
* Batch and shuffle data

In [56]:
train_data = MLDatasets.CIFAR10(Tx=Float32, split=:train)
test_data = MLDatasets.CIFAR10(Tx=Float32, split=:test)

dataset CIFAR10:
  metadata  =>    Dict{String, Any} with 2 entries
  split     =>    :test
  features  =>    32×32×3×10000 Array{Float32, 4}
  targets   =>    10000-element Vector{Int64}

In [57]:
train_x = train_data.features;
train_y = train_data.targets;

test_x = test_data.features;
test_y = test_data.targets;
size(train_x), size(test_x)  # Data is in shape WHCB

((32, 32, 3, 50000), (32, 32, 3, 10000))

In [58]:
# Train-test split
# Copied from https://github.com/JuliaML/MLUtils.jl/blob/v0.2.11/src/splitobs.jl#L65
# obsview doesn't work with this data, so use getobs instead

import MLDataPattern.splitobs;

function splitobs(data; at, shuffle::Bool=false)
    if shuffle
        data = shuffleobs(data)
    end
    n = numobs(data)
    return map(idx -> MLDataPattern.getobs(data, idx), splitobs(n, at))
end

splitobs (generic function with 11 methods)

In [59]:
train, val = splitobs((train_x, train_y), at=0.9, shuffle=true);

train_x, train_y = train;
val_x, val_y = val;

size(train_x), size(val_x)

((32, 32, 3, 45000), (32, 32, 3, 5000))

In [60]:
# Normalize all the data

means = reshape([0.485, 0.465, 0.406], (1, 1, 3, 1))
stdevs = reshape([0.229, 0.224, 0.225], (1, 1, 3, 1))
normalize(x) = (x .- means) ./ stdevs

train_x = normalize(train_x);
val_x = normalize(val_x);
test_x = normalize(test_x);

In [61]:
# Notebook testing: Use less data
train_x, train_y = MLDatasets.getobs((train_x, train_y), 1:500);

val_x, val_y = MLDatasets.getobs((val_x, val_y), 1:50);

test_x, test_y = MLDatasets.getobs((test_x, test_y), 1:50);

# Data augmentation pipeline with Augmentor.jl

By default, batch is the last dimension.

In [62]:
# Pad the training data for further augmentation
train_x_padded = padarray(train_x, Fill(0, (4, 4, 0, 0)));  
size(train_x_padded)  # Should be (40, 40, 3, 50000)

(40, 40, 3, 500)

In [63]:
pl = PermuteDims((3, 1, 2)) |> CombineChannels(RGB) |> Either(FlipX(), NoOp()) |> RCropSize(32, 32) |> SplitChannels() |> PermuteDims((2, 3, 1))


6-step Augmentor.ImmutablePipeline:
 1.) Permute dimension order to (3, 1, 2)
 2.) Combine color channels into colorant RGB
 3.) Either: (50%) Flip the X axis. (50%) No operation.
 4.) Crop random window with size (32, 32)
 5.) Split colorant into its color channels
 6.) Permute dimension order to (2, 3, 1)

In [64]:
# Create an output array for augmented images
outbatch(X) = Array{Float32}(undef, (32, 32, 3, nobs(X)))

outbatch (generic function with 1 method)

In [65]:
# Function that takes a batch (images and targets) and augments the images
augmentbatch((X, y)) = (augmentbatch!(outbatch(X), X, pl), y)

augmentbatch (generic function with 1 method)

In [66]:
# Shuffled and batched dataset of augmented images
train_batch_size = 16

train_batches = mappedarray(augmentbatch, batchview(shuffleobs((train_x_padded, train_y)), size=train_batch_size));

└ @ MLDataPattern /home/araising/.julia/packages/MLDataPattern/2yPuO/src/dataview.jl:205


In [67]:
# Test and Validation data
test_batch_size = 32

val_loader = DataLoader((val_x, val_y), shuffle=true, batchsize=test_batch_size);
test_loader = DataLoader((test_x, test_y), shuffle=true, batchsize=test_batch_size);

## 2D Convolution in Flux


**Flux.Conv — Type** 

Conv(filter, in => out, σ = identity;
     stride = 1, pad = 0, dilation = 1, groups = 1, [bias, init])


Standard convolutional layer. _filter_ is a tuple of integers specifying the size of the convolutional kernel; _in_ and _out_ specify the number of input and output channels.

Image data should be stored in WHCN order (width, height, channels, batch). In other words, a 100×100 RGB image would be a 100×100×3×1 array, and a batch of 50 would be a 100×100×3×50 array. This has N = 2 spatial dimensions, and needs a kernel size like (5,5), a 2-tuple of integers.

To take convolutions along N feature dimensions, this layer expects as input an array with ndims(x) == N+2, where size(x, N+1) == in is the number of input channels, and size(x, ndims(x)) is (as always) the number of observations in a batch. Then:

- filter should be a tuple of N integers.
- Keywords stride and dilation should each be either single integer, or a tuple with N integers.
- Keyword pad specifies the number of elements added to the borders of the data array. It can be
    - a single integer for equal padding all around,
    - a tuple of N integers, to apply the same padding at begin/end of each spatial dimension,
    - a tuple of 2*N integers, for asymmetric padding, or
    - the singleton _SamePad()_, to calculate padding such that size(output,d) == size(x,d) / stride (possibly rounded) for each spatial dimension.
- Keyword groups is expected to be an Int. It specifies the number of groups to divide a convolution into.

Keywords to control initialization of the layer:

- init - Function used to generate initial weights. Defaults to glorot_uniform.
- bias - The initial bias vector is all zero by default. Trainable bias can be disabled entirely by setting this to false, or another vector can be provided such as bias = randn(Float32, out).
            
            
**Flux.Conv - Method**
_Conv(weight::AbstractArray, [bias, activation; stride, pad, dilation])_

Constructs a convolutional layer with the given weight and bias. Accepts the same keywords and has the same defaults as Conv(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ; ...).

## ResNet Layer

In [6]:
mutable struct ResNetLayer
    conv1::Flux.Conv
    conv2::Flux.Conv
    bn1::Flux.BatchNorm
    bn2::Flux.BatchNorm
    f::Function
    in_channels::Int
    channels::Int
    stride1::Int
    stride2::Int
    pad1::Int
    pad2::Int
end

# @functor ResNetLayer (conv1, conv2, bn1, bn2)


In [7]:
function residual_identity(layer::ResNetLayer, x::AbstractArray{T, 4}) where {T<:Number}
    (w, h, c, b) = size(x)
    stride = layer.stride1
    if stride > 1
        @assert ((w % stride == 0) & (h % stride == 0)) "Spatial dimensions are not divisible by `stride`"
    
        # Strided downsample
        x_id = copy(x[begin:2:end, begin:2:end, :, :])
    else
        x_id = x
    end

    channels = layer.channels
    in_channels = layer.in_channels
    if in_channels < channels
        # Zero padding on extra channels
        (w, h, c, b) = size(x_id)
        pad = zeros(w, h, channels - in_channels, b)
        x_id = cat(x_id, pad; dims=3)
    elseif in_channels > channels
        error("in_channels > out_channels not supported")
    end
    return x_id
end

residual_identity (generic function with 1 method)

In [8]:
function ResNetLayer(in_channels::Int, channels::Int; stride1=1, stride2=1, f=relu, pad1=0, pad2=0)
    bn1 = Flux.BatchNorm(in_channels)
    conv1 = Flux.Conv((3,3), in_channels=>channels, f; stride=stride1, pad=pad1)
    bn2 = Flux.BatchNorm(channels)
    conv2 = Flux.Conv((3,3), channels=>channels, f; stride=stride2, pad=pad2)

    return ResNetLayer(conv1, conv2, bn1, bn2, f, in_channels, channels, stride1, stride2, pad1, pad2)
end

ResNetLayer

In [9]:
function (self::ResNetLayer)(x::AbstractArray)
    identity = residual_identity(self, x)
    z = self.bn1(x)
    z = self.f(z)
    z = self.conv1(z)
    z = self.bn2(z)
    z = self.f(z)
    z = self.conv2(z)

    y = z + identity
    return y
end

In [10]:
l = ResNetLayer(3, 10; stride1=2, stride2=1, pad1=1, pad2=1);

In [11]:
x = randn(Float32, (64, 64, 3, 2));
y = l(x);
size(y)

(32, 32, 10, 2)

# Linear Layer

In [12]:
mutable struct Linear
    W::AbstractMatrix{T} where T
    b::AbstractVector{T} where T
end

# @functor Linear

# Init
function Linear(in_features::Int, out_features::Int)
    k_sqrt = sqrt(1 / in_features)
    d = Uniform(-k_sqrt, k_sqrt)
    return Linear(rand(d, out_features, in_features), rand(d, out_features))
end
Linear(in_out::Pair{Int, Int}) = Linear(in_out[1], in_out[2])

function Base.show(io::IO, l::Linear)
    o, i = size(l.W)
    print(io, "Linear(o)")
end

# Forward
(l::Linear)(x::AbstractArray) where T = l.W * x .+ l.b

# ResNet20 Model

In [13]:
mutable struct ResNetV2
    input_layer::Flux.Conv
    layer_1::ResNetLayer
    layer_2::ResNetLayer
    layer_3::ResNetLayer
    layer_4::ResNetLayer
    layer_5::ResNetLayer
    layer_6::ResNetLayer
    layer_7::ResNetLayer
    layer_8::ResNetLayer
    layer_9::ResNetLayer
    pool::GlobalMeanPool
    linear::Linear
end

In [15]:
function ResNetV2(in_channels::Int, num_classes::Int)
    input_layer = Flux.Conv((3,3), in_channels=>16)
    
    layer_1 = ResNetLayer(16, 16)
    layer_2 = ResNetLayer(16, 16)
    layer_3 = ResNetLayer(16, 16)
    
    layer_4 = ResNetLayer(16, 32, stride1=2, stride2=1)
    layer_5 = ResNetLayer(32, 32)
    layer_6 = ResNetLayer(32, 32)
    
    layer_7 = ResNetLayer(32, 64, stride1=2, stride2=1)
    layer_8 = ResNetLayer(64, 64)
    layer_9 = ResNetLayer(64, 64)
    
    pool = GlobalMeanPool()
    
    linear = Linear(64, num_classes)
end

"""
    ResNet20(x)
Forward function for ResNet20 model

# Arguments
- `x::AbstractArray`: 4D input image tensor of shape (width, height, channels, batch size)

# Returns
- `y::AbstractArray`: 2D output tensor of shape (num classes, batch size)
"""
function (self::ResNetV2)(x::AbstractArray)
    z = self.input_layer(x)
    z = self.layer_1(z)
    z = self.layer_2(z)
    z = self.layer_3(z)
    z = self.layer_4(z)
    z = self.layer_5(z)
    z = self.layer_6(z)
    z = self.layer_7(z)
    z = self.layer_8(z)
    z = self.layer_9(z)
    z = self.pool(z)
    z = dropdims(z, dims=(1, 2))
    y = self.linear(z)
    return y
end

ResNetV2

In [19]:
# Testing ResNet20 model
# Expected output: (10, 4)
m = ResNetV2(3, 10);
inputs = randn(Float32, (32, 32, 3, 4))
outputs = m(inputs);
size(outputs)

LoadError: UndefVarError: Uniform not defined